diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -118,11 +118,7 @@ GISelKnownBitsAnalysis() : MachineFunctionPass(ID) { initializeGISelKnownBitsAnalysisPass(*PassRegistry::getPassRegistry()); } - GISelKnownBits &get(MachineFunction &MF) { - if (!Info) - Info = std::make_unique(MF); - return *Info.get(); - } + GISelKnownBits &get(MachineFunction &MF); void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; void releaseMemory() override { Info.reset(); } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -35,6 +35,7 @@ MachineIRBuilder &Builder; MachineRegisterInfo &MRI; const LegalizerInfo &LI; + GISelKnownBits *KB; static bool isArtifactCast(unsigned Opc) { switch (Opc) { @@ -50,8 +51,9 @@ public: LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, - const LegalizerInfo &LI) - : Builder(B), MRI(MRI), LI(LI) {} + const LegalizerInfo &LI, + GISelKnownBits *KB = nullptr) + : Builder(B), MRI(MRI), LI(LI), KB(KB) {} bool tryCombineAnyExt(MachineInstr &MI, SmallVectorImpl &DeadInsts, @@ -131,13 +133,20 @@ LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); - auto Mask = Builder.buildConstant( - DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); if (SextSrc && (DstTy != MRI.getType(SextSrc))) SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); - Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask); + APInt ExtMaskVal = MaskVal.zext(DstTy.getScalarSizeInBits()); + Register AndSrc = SextSrc ? SextSrc : TruncSrc; + // Elide AND if it is proven redundant (e.g between boolean uses and defs) + if (KB && (KB->getKnownZeroes(AndSrc) | ExtMaskVal).isAllOnes()) { + replaceRegOrBuildCopy(DstReg, AndSrc, MRI, Builder, UpdatedDefs, + Observer); + } else { + auto Mask = Builder.buildConstant(DstTy, ExtMaskVal); + Builder.buildAnd(DstReg, AndSrc, Mask); + } markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "gisel-known-bits" @@ -773,3 +774,12 @@ bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { return false; } + +GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { + if (!Info) { + unsigned MaxDepth = + MF.getTarget().getOptLevel() == CodeGenOpt::Level::None ? 2 : 6; + Info = std::make_unique(MF, MaxDepth); + } + return *Info.get(); +} diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -218,7 +218,7 @@ // This will keep all the observers notified about new insertions/deletions. RAIIMFObsDelInstaller Installer(MF, WrapperObserver); LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB); - LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI); + LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI, KB); bool Changed = false; SmallVector RetryList; do { diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll @@ -926,7 +926,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll @@ -511,7 +511,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -532,7 +531,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -553,7 +551,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -574,7 +571,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -595,7 +591,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -831,7 +826,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -846,7 +840,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -861,7 +854,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -876,7 +868,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -891,7 +882,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1046,7 +1036,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1066,7 +1055,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1086,7 +1074,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1106,7 +1093,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1126,7 +1112,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1361,7 +1346,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1375,7 +1359,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1389,7 +1372,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1403,7 +1385,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1417,7 +1398,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -3951,15 +3931,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -3983,15 +3959,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4015,15 +3987,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4047,15 +4015,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4079,15 +4043,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4147,9 +4107,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -4165,9 +4123,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -4183,9 +4139,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -4201,9 +4155,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -4219,9 +4171,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -4236,9 +4186,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -4252,9 +4200,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -4268,9 +4214,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -4284,9 +4228,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -4300,9 +4242,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -4316,9 +4256,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -4332,9 +4270,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -4348,9 +4284,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -4364,9 +4298,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -4380,9 +4312,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -4396,15 +4326,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4422,15 +4348,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4448,15 +4370,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4474,15 +4392,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4500,15 +4414,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4666,15 +4576,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -4698,15 +4604,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4730,15 +4632,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4762,15 +4660,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4794,15 +4688,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4862,9 +4752,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -4880,9 +4768,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -4898,9 +4784,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -4916,9 +4800,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -4934,9 +4816,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -4951,9 +4831,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -4967,9 +4845,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -4983,9 +4859,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -4999,9 +4873,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -5015,9 +4887,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -5031,9 +4901,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -5047,9 +4915,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -5063,9 +4929,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -5079,9 +4943,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -5095,9 +4957,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -5111,15 +4971,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5137,15 +4993,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5163,15 +5015,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5189,15 +5037,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5215,15 +5059,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5381,15 +5221,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -5413,15 +5249,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -5445,15 +5277,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -5477,15 +5305,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5509,15 +5333,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5576,9 +5396,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -5593,9 +5411,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -5610,9 +5426,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -5627,9 +5441,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -5644,9 +5456,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -5661,9 +5471,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -5677,9 +5485,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -5693,9 +5499,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -5709,9 +5513,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -5725,9 +5527,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -5741,9 +5541,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -5757,9 +5555,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -5773,9 +5569,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -5789,9 +5583,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -5805,9 +5597,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -5821,15 +5611,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5847,15 +5633,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5873,15 +5655,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5899,15 +5677,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5925,15 +5699,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6091,15 +5861,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -6123,15 +5889,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -6155,15 +5917,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -6187,15 +5945,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6219,15 +5973,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6286,9 +6036,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -6303,9 +6051,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -6320,9 +6066,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -6337,9 +6081,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -6354,9 +6096,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -6371,9 +6111,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -6387,9 +6125,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -6403,9 +6139,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -6419,9 +6153,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -6435,9 +6167,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -6451,9 +6181,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -6467,9 +6195,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -6483,9 +6209,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -6499,9 +6223,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -6515,9 +6237,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -6531,15 +6251,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6557,15 +6273,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6583,15 +6295,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6609,15 +6317,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6635,15 +6339,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll @@ -556,7 +556,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -579,7 +578,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -602,7 +600,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -625,7 +622,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -648,7 +644,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -886,7 +881,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -901,7 +895,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -916,7 +909,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -931,7 +923,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -946,7 +937,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1181,7 +1171,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1203,7 +1192,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1225,7 +1213,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1247,7 +1234,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1269,7 +1255,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1526,7 +1511,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1540,7 +1524,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1554,7 +1537,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1568,7 +1550,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1582,7 +1563,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -4257,9 +4237,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4280,9 +4258,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4303,9 +4279,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4326,9 +4300,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4349,9 +4321,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4372,9 +4342,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4394,9 +4362,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4416,9 +4382,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4438,9 +4402,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4460,9 +4422,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4481,9 +4441,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4501,9 +4459,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4521,9 +4477,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4541,9 +4495,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4561,9 +4513,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4581,9 +4531,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4601,9 +4549,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4621,9 +4567,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4641,9 +4585,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4661,9 +4603,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4681,15 +4621,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -4715,15 +4651,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -4749,15 +4681,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -4783,15 +4711,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -4817,15 +4741,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -4852,9 +4772,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4875,9 +4793,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4898,9 +4814,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4921,9 +4835,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4944,9 +4856,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4967,9 +4877,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -4985,9 +4893,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -5003,9 +4909,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -5021,9 +4925,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -5039,9 +4941,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -5056,9 +4956,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -5072,9 +4970,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -5088,9 +4984,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -5104,9 +4998,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -5120,9 +5012,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -5136,9 +5026,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -5152,9 +5040,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -5168,9 +5054,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -5184,9 +5068,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -5200,9 +5082,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -5216,15 +5096,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5242,15 +5118,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5268,15 +5140,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5294,15 +5162,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5320,15 +5184,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5347,9 +5207,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5370,9 +5228,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5393,9 +5249,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5416,9 +5270,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5439,9 +5291,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5462,9 +5312,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5484,9 +5332,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5506,9 +5352,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5528,9 +5372,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5550,9 +5392,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5571,9 +5411,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5591,9 +5429,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5611,9 +5447,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5631,9 +5465,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5651,9 +5483,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5671,9 +5501,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5691,9 +5519,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5711,9 +5537,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5731,9 +5555,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5751,9 +5573,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5771,15 +5591,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5805,15 +5621,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5839,15 +5651,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5873,15 +5681,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5907,15 +5711,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5942,9 +5742,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5965,9 +5763,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5988,9 +5784,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6011,9 +5805,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6034,9 +5826,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6057,9 +5847,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -6075,9 +5863,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -6093,9 +5879,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -6111,9 +5895,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -6129,9 +5911,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -6146,9 +5926,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -6162,9 +5940,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -6178,9 +5954,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -6194,9 +5968,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -6210,9 +5982,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -6226,9 +5996,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -6242,9 +6010,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -6258,9 +6024,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -6274,9 +6038,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -6290,9 +6052,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -6306,15 +6066,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6332,15 +6088,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6358,15 +6110,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6384,15 +6132,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6410,15 +6154,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6437,9 +6177,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6460,9 +6198,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6483,9 +6219,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6506,9 +6240,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6529,9 +6261,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6281,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6572,9 +6300,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6593,9 +6319,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6614,9 +6338,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6635,9 +6357,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6656,9 +6376,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6676,9 +6394,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6696,9 +6412,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6716,9 +6430,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6736,9 +6448,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6756,9 +6466,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6776,9 +6484,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6796,9 +6502,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6816,9 +6520,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6836,9 +6538,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6856,15 +6556,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6890,15 +6586,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6924,15 +6616,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6958,15 +6646,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6992,15 +6676,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7027,9 +6707,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7050,9 +6728,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7073,9 +6749,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7096,9 +6770,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7119,9 +6791,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7141,9 +6811,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -7158,9 +6826,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -7175,9 +6841,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -7192,9 +6856,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -7209,9 +6871,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -7226,9 +6886,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -7242,9 +6900,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -7258,9 +6914,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -7274,9 +6928,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -7290,9 +6942,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -7306,9 +6956,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -7322,9 +6970,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -7338,9 +6984,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -7354,9 +6998,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -7370,9 +7012,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -7386,15 +7026,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7412,15 +7048,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7438,15 +7070,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7464,15 +7092,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7490,15 +7114,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7517,9 +7137,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7540,9 +7158,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7563,9 +7179,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7586,9 +7200,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7609,9 +7221,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7631,9 +7241,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7652,9 +7260,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7673,9 +7279,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7694,9 +7298,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7715,9 +7317,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7736,9 +7336,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7756,9 +7354,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7776,9 +7372,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7796,9 +7390,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7816,9 +7408,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7836,9 +7426,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7856,9 +7444,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7876,9 +7462,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7896,9 +7480,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7916,9 +7498,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7936,15 +7516,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7970,15 +7546,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8004,15 +7576,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8038,15 +7606,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8072,15 +7636,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8107,9 +7667,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8130,9 +7688,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8153,9 +7709,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8176,9 +7730,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8199,9 +7751,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8221,9 +7771,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -8238,9 +7786,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -8255,9 +7801,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -8272,9 +7816,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -8289,9 +7831,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -8306,9 +7846,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -8322,9 +7860,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -8338,9 +7874,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -8354,9 +7888,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -8370,9 +7902,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -8386,9 +7916,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -8402,9 +7930,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -8418,9 +7944,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -8434,9 +7958,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -8450,9 +7972,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -8466,15 +7986,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8492,15 +8008,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8518,15 +8030,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8544,15 +8052,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8570,15 +8074,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll @@ -926,7 +926,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll @@ -926,7 +926,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll @@ -546,7 +546,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -567,7 +566,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -588,7 +586,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -609,7 +606,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -630,7 +626,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -866,7 +861,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -881,7 +875,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -896,7 +889,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -911,7 +903,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -926,7 +917,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1081,7 +1071,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1101,7 +1090,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1121,7 +1109,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1141,7 +1128,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1161,7 +1147,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1396,7 +1381,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1410,7 +1394,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1424,7 +1407,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1438,7 +1420,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1452,7 +1433,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -4076,15 +4056,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -4108,15 +4084,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4140,15 +4112,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4172,15 +4140,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4204,15 +4168,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4272,9 +4232,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -4290,9 +4248,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -4308,9 +4264,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -4326,9 +4280,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -4344,9 +4296,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -4361,9 +4311,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -4377,9 +4325,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -4393,9 +4339,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -4409,9 +4353,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -4425,9 +4367,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -4441,9 +4381,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -4457,9 +4395,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -4473,9 +4409,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -4489,9 +4423,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -4505,9 +4437,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -4521,15 +4451,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4547,15 +4473,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4573,15 +4495,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4599,15 +4517,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4625,15 +4539,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4791,15 +4701,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -4823,15 +4729,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4855,15 +4757,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4887,15 +4785,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4919,15 +4813,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4987,9 +4877,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -5005,9 +4893,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -5023,9 +4909,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -5041,9 +4925,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -5059,9 +4941,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -5076,9 +4956,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -5092,9 +4970,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -5108,9 +4984,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -5124,9 +4998,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -5140,9 +5012,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -5156,9 +5026,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -5172,9 +5040,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -5188,9 +5054,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -5204,9 +5068,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -5220,9 +5082,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -5236,15 +5096,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5262,15 +5118,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5288,15 +5140,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5314,15 +5162,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5340,15 +5184,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5506,15 +5346,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -5538,15 +5374,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -5570,15 +5402,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -5602,15 +5430,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5634,15 +5458,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5701,9 +5521,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -5718,9 +5536,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -5735,9 +5551,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -5752,9 +5566,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -5769,9 +5581,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -5786,9 +5596,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -5802,9 +5610,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -5818,9 +5624,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -5834,9 +5638,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -5850,9 +5652,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -5866,9 +5666,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -5882,9 +5680,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -5898,9 +5694,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -5914,9 +5708,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -5930,9 +5722,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -5946,15 +5736,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5972,15 +5758,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5998,15 +5780,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6024,15 +5802,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6050,15 +5824,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6216,15 +5986,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -6248,15 +6014,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -6280,15 +6042,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -6312,15 +6070,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6344,15 +6098,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6411,9 +6161,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -6428,9 +6176,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -6445,9 +6191,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -6462,9 +6206,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -6479,9 +6221,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -6496,9 +6236,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -6512,9 +6250,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -6528,9 +6264,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -6544,9 +6278,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -6560,9 +6292,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -6576,9 +6306,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -6592,9 +6320,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -6608,9 +6334,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -6624,9 +6348,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -6640,9 +6362,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -6656,15 +6376,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6682,15 +6398,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6708,15 +6420,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6734,15 +6442,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6760,15 +6464,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll @@ -926,7 +926,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -1770,10 +1770,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxtb w9, w10 ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, sxtb -; CHECK-NOLSE-O0-NEXT: cset w9, le -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, le ; CHECK-NOLSE-O0-NEXT: LBB33_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB33_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -1843,10 +1840,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxtb w9, w10 ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, sxtb -; CHECK-NOLSE-O0-NEXT: cset w9, gt -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, gt ; CHECK-NOLSE-O0-NEXT: LBB34_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB34_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -1917,10 +1911,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: and w9, w10, #0xff ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, uxtb -; CHECK-NOLSE-O0-NEXT: cset w9, ls -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ls ; CHECK-NOLSE-O0-NEXT: LBB35_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB35_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -1991,10 +1982,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: and w9, w10, #0xff ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, uxtb -; CHECK-NOLSE-O0-NEXT: cset w9, hi -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, hi ; CHECK-NOLSE-O0-NEXT: LBB36_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB36_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2463,10 +2451,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, sxth -; CHECK-NOLSE-O0-NEXT: cset w10, le -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, le ; CHECK-NOLSE-O0-NEXT: LBB43_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB43_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2536,10 +2521,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, sxth -; CHECK-NOLSE-O0-NEXT: cset w10, gt -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, gt ; CHECK-NOLSE-O0-NEXT: LBB44_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB44_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2610,10 +2592,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: uxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, uxth -; CHECK-NOLSE-O0-NEXT: cset w10, ls -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ls ; CHECK-NOLSE-O0-NEXT: LBB45_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB45_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2684,10 +2663,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: uxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, uxth -; CHECK-NOLSE-O0-NEXT: cset w10, hi -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, hi ; CHECK-NOLSE-O0-NEXT: LBB46_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB46_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2763,8 +2739,7 @@ ; CHECK-NOLSE-O0-NEXT: LBB47_3: ; CHECK-NOLSE-O0-NEXT: and w8, w0, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w1, uxtb -; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-NOLSE-O0-NEXT: cset w1, eq ; CHECK-NOLSE-O0-NEXT: ret ; ; CHECK-LSE-O1-LABEL: cmpxchg_i8: @@ -2784,8 +2759,7 @@ ; CHECK-LSE-O0-NEXT: casb w0, w2, [x8] ; CHECK-LSE-O0-NEXT: and w8, w0, #0xff ; CHECK-LSE-O0-NEXT: subs w8, w8, w1, uxtb -; CHECK-LSE-O0-NEXT: cset w8, eq -; CHECK-LSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-LSE-O0-NEXT: cset w1, eq ; CHECK-LSE-O0-NEXT: ret %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic ret { i8, i1 } %res @@ -2829,8 +2803,7 @@ ; CHECK-NOLSE-O0-NEXT: LBB48_3: ; CHECK-NOLSE-O0-NEXT: and w8, w0, #0xffff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w1, uxth -; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-NOLSE-O0-NEXT: cset w1, eq ; CHECK-NOLSE-O0-NEXT: ret ; ; CHECK-LSE-O1-LABEL: cmpxchg_i16: @@ -2850,8 +2823,7 @@ ; CHECK-LSE-O0-NEXT: cash w0, w2, [x8] ; CHECK-LSE-O0-NEXT: and w8, w0, #0xffff ; CHECK-LSE-O0-NEXT: subs w8, w8, w1, uxth -; CHECK-LSE-O0-NEXT: cset w8, eq -; CHECK-LSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-LSE-O0-NEXT: cset w1, eq ; CHECK-LSE-O0-NEXT: ret %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic ret { i16, i1 } %res diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir @@ -2,7 +2,7 @@ # Check that when we combine ZEXT/ANYEXT we assign the correct location. # CHECK: !8 = !DILocation(line: 23, column: 5, scope: !4) -# CHECK: G_AND %16, %15, debug-location !8 +# CHECK: G_AND %15, %16, debug-location !8 --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir @@ -2,7 +2,7 @@ # Check that when we combine SEXT we assign the correct debug location. # CHECK: !9 = !DILocation(line: 36, column: 21, scope: !4) -# CHECK: G_AND %5, %4, debug-location !9 +# CHECK: G_AND %4, %5, debug-location !9 --- | target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll @@ -1,9 +1,7 @@ ; RUN: llc -mtriple=arm64-apple-ios %s -o - -O0 -global-isel=1 | FileCheck %s define void @foo(i512 %in) { ; CHECK-LABEL: foo: -; CHECK: subs -; CHECK-NEXT: cset -; CHECK-NEXT: tbnz +; CHECK: cbz switch i512 %in, label %default [ i512 3923188584616675477397368389504791510063972152790021570560, label %l1 i512 3923188584616675477397368389504791510063972152790021570561, label %l2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll b/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll @@ -38,8 +38,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.continue: ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[C2]](s16), %bb.1, [[C3]](s16), %bb.2 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -95,8 +95,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.continue: ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[ANYEXT]](s16), %bb.1, [[C2]](s16), %bb.2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll @@ -38,6 +38,7 @@ ; TRANSLATED-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp ; TRANSLATED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; TRANSLATED-NEXT: G_BR %bb.2 + ; ; PRESELECTION-LABEL: name: test ; PRESELECTION: bb.1.entry: ; PRESELECTION-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) @@ -50,8 +51,8 @@ ; PRESELECTION-NEXT: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; PRESELECTION-NEXT: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 100000 ; PRESELECTION-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:gpr(s32) = G_CONSTANT_FOLD_BARRIER [[C1]] - ; PRESELECTION-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 ; PRESELECTION-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT [[ASSERT_ZEXT]](s8) + ; PRESELECTION-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 ; PRESELECTION-NEXT: [[AND:%[0-9]+]]:gpr(s32) = G_AND [[ANYEXT]], [[C2]] ; PRESELECTION-NEXT: G_BRCOND [[AND]](s32), %bb.3 ; PRESELECTION-NEXT: G_BR %bb.2 @@ -69,6 +70,7 @@ ; PRESELECTION-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp ; PRESELECTION-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; PRESELECTION-NEXT: G_BR %bb.2 + ; ; POSTSELECTION-LABEL: name: test ; POSTSELECTION: bb.1.entry: ; POSTSELECTION-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir @@ -10,9 +10,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) %0:_(s64) = COPY $x0 @@ -37,11 +35,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x2 = COPY [[UADDE2]](s64) @@ -89,9 +84,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) %0:_(s64) = COPY $x0 @@ -119,9 +112,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) %0:_(s64) = COPY $x0 @@ -367,15 +358,15 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[IMPLICIT_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT0]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[IMPLICIT_DEF]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<4 x s16>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[VAL0:%[0-9]+]]:_(s16), [[VAL1:%[0-9]+]]:_(s16), [[VAL2:%[0-9]+]]:_(s16), [[VAL3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ADD]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[VAL0]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ADD]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir @@ -145,8 +145,8 @@ ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[DEF]] ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 ; CHECK: %ext:_(s64) = G_AND [[ANYEXT]], [[C1]] ; CHECK: $x0 = COPY %ext(s64) ; CHECK: RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -9,8 +9,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY]](s64), [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[ICMP]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -50,11 +50,8 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[C1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[C1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.1 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: G_BRCOND [[SELECT]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -91,9 +88,7 @@ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -132,9 +127,7 @@ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[AND1]], [[AND3]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND4]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -173,9 +166,7 @@ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[AND1]], [[AND3]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND4]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -214,9 +205,7 @@ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[AND1]], [[AND3]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND4]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -278,9 +267,7 @@ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[OR4]], [[XOR6]] ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[OR5]], [[XOR7]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR6]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND16]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -326,9 +313,7 @@ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[XOR2]] ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[XOR3]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR2]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND8]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir @@ -292,18 +292,13 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[AND]](s64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTLZ]], [[C3]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C4]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C2]], [[C2]], [[AND2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C2]], [[C2]], [[UADDO1]] ; CHECK-NEXT: [[CTLZ1:%[0-9]+]]:_(s64) = G_CTLZ [[AND1]](s64) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s32), [[UADDO]], [[CTLZ1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[UADDE]], [[C2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[SELECT]], [[C5]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C4]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SELECT1]], [[C2]], [[AND5]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[CTLZ1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDE]], [[C2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[SELECT]], [[C4]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SELECT1]], [[C2]], [[USUBO1]] ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir @@ -172,8 +172,8 @@ ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) @@ -211,8 +211,8 @@ ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) @@ -250,8 +250,8 @@ ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) @@ -288,8 +288,8 @@ ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir @@ -19,6 +19,7 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s8 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -51,6 +52,7 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s16 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -83,6 +85,7 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s32 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -112,6 +115,7 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64) ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; ; CHECK-CSSC-LABEL: name: s64 ; CHECK-CSSC: liveins: $x0 ; CHECK-CSSC-NEXT: {{ $}} @@ -144,6 +148,7 @@ ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<4 x s32>) = G_CTPOP [[AND]](<4 x s32>) ; CHECK-NEXT: $q0 = COPY [[CTPOP]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; ; CHECK-CSSC-LABEL: name: v4s32 ; CHECK-CSSC: liveins: $q0 ; CHECK-CSSC-NEXT: {{ $}} @@ -180,6 +185,7 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64) ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; ; CHECK-CSSC-LABEL: name: s35 ; CHECK-CSSC: liveins: $x0 ; CHECK-CSSC-NEXT: {{ $}} @@ -218,17 +224,15 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTLZ]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[UADDO1]] ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR]] ; CHECK-NEXT: [[CTLZ1:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE1]](s64) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTLZ1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[CTLZ1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDE]], [[C]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: $x1 = COPY [[SELECT1]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + ; ; CHECK-CSSC-LABEL: name: s65 ; CHECK-CSSC: liveins: $x0, $x1 ; CHECK-CSSC-NEXT: {{ $}} @@ -242,13 +246,10 @@ ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[OR1]](s64) ; CHECK-CSSC-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK-CSSC-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTTZ]], [[C2]] - ; CHECK-CSSC-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]] - ; CHECK-CSSC-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]] + ; CHECK-CSSC-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[UADDO1]] ; CHECK-CSSC-NEXT: [[CTTZ1:%[0-9]+]]:_(s64) = G_CTTZ [[OR]](s64) - ; CHECK-CSSC-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-CSSC-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTTZ1]] - ; CHECK-CSSC-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]] + ; CHECK-CSSC-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[CTTZ1]] + ; CHECK-CSSC-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDE]], [[C]] ; CHECK-CSSC-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-CSSC-NEXT: $x1 = COPY [[SELECT1]](s64) ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0, implicit $x1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir @@ -6,20 +6,20 @@ bb.0.entry: ; CHECK-LABEL: name: test_div ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 - ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: $w0 = COPY [[SDIV]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; CHECK: $w0 = COPY [[UDIV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: $w0 = COPY [[SDIV]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] + ; CHECK-NEXT: $w0 = COPY [[UDIV]](s32) %0:_(s64) = COPY $x0 %1:_(s64) = COPY $x1 %2:_(s8) = G_TRUNC %0(s64) @@ -43,17 +43,18 @@ ; CHECK-LABEL: name: sdiv_v4s32 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV4]] - ; CHECK: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV5]] - ; CHECK: [[SDIV2:%[0-9]+]]:_(s32) = G_SDIV [[UV2]], [[UV6]] - ; CHECK: [[SDIV3:%[0-9]+]]:_(s32) = G_SDIV [[UV3]], [[UV7]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32), [[SDIV2]](s32), [[SDIV3]](s32) - ; CHECK: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV4]] + ; CHECK-NEXT: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV5]] + ; CHECK-NEXT: [[SDIV2:%[0-9]+]]:_(s32) = G_SDIV [[UV2]], [[UV6]] + ; CHECK-NEXT: [[SDIV3:%[0-9]+]]:_(s32) = G_SDIV [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32), [[SDIV2]](s32), [[SDIV3]](s32) + ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = G_SDIV %0, %1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir @@ -6,12 +6,12 @@ bb.0.entry: ; CHECK-LABEL: name: test_cse_in_legalizer ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; CHECK: $w0 = COPY [[COPY1]](s32) - ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; CHECK-NEXT: $w0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) %0:_(s64) = COPY $x0 %1:_(s8) = G_TRUNC %0(s64) %19:_(s32) = G_ZEXT %1(s8) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir @@ -10,11 +10,11 @@ ; CHECK: CSEInfo::Add MI: %{{[0-9]+}}:_(s32) = G_ZEXT ; CHECK: CSEInfo::Add MI: %{{[0-9]+}}:_(s8) = G_TRUNC ; CHECK: CSEInfo::Add MI: %{{[0-9]+}}:_(s32) = G_ZEXT - ; CHECK: CSEInfo::Recording new MI G_CONSTANT ; CHECK: CSEInfo::Recording new MI G_TRUNC + ; CHECK: CSEInfo::Recording new MI G_CONSTANT ; CHECK: CSEInfo::Recording new MI G_AND - ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_CONSTANT ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_TRUNC + ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_CONSTANT ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_AND ; CHECK: CSEInfo::CSE Hit for Opc {{[0-9]+}} : 1 ; CHECK: CSEInfo::CSE Hit for Opc {{[0-9]+}} : 1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir @@ -24,14 +24,14 @@ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC4]], 1 ; CHECK-NEXT: $w0 = COPY [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC5]], [[C1]] ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: $w0 = COPY [[TRUNC6]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC7]], [[C2]] ; CHECK-NEXT: $w0 = COPY [[AND2]](s32) ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir @@ -291,8 +291,8 @@ ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CHECK-NEXT: %ext:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: $w0 = COPY %ext(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir @@ -7,7 +7,9 @@ liveins: $x0 ; CHECK-LABEL: name: test_freeze_s64 - ; CHECK: %x0:_(s64) = COPY $x0 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x0:_(s64) = COPY $x0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %x0 ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64) %x0:_(s64) = COPY $x0 @@ -21,7 +23,9 @@ liveins: $q0 ; CHECK-LABEL: name: test_freeze_v4s32 - ; CHECK: %q0:_(<4 x s32>) = COPY $q0 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %q0:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE %q0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>) ; CHECK-NEXT: $x0 = COPY [[UV]](<2 x s32>) @@ -56,7 +60,9 @@ liveins: $d0 ; CHECK-LABEL: name: test_freeze_v2s32 - ; CHECK: %d0:_(<2 x s32>) = COPY $d0 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %d0:_(<2 x s32>) = COPY $d0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE %d0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<2 x s32>) ; CHECK-NEXT: $w0 = COPY [[UV]](s32) @@ -74,7 +80,9 @@ liveins: $d0 ; CHECK-LABEL: name: test_freeze_v8s8 - ; CHECK: %d0:_(<8 x s8>) = COPY $d0 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %d0:_(<8 x s8>) = COPY $d0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<8 x s8>) = G_FREEZE %d0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[FREEZE]](<8 x s8>) ; CHECK-NEXT: $w0 = COPY [[UV]](<4 x s8>) @@ -91,10 +99,12 @@ bb.0.entry: liveins: $x0 ; CHECK-LABEL: name: test_freeze_s1 - ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[FREEZE]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %ext:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $x0 = COPY %ext(s64) %x:_(s1) = G_IMPLICIT_DEF @@ -108,10 +118,12 @@ bb.0.entry: liveins: $x0 ; CHECK-LABEL: name: test_freeze_s2 - ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[FREEZE]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CHECK-NEXT: %ext:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $x0 = COPY %ext(s64) %x:_(s2) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir @@ -21,15 +21,13 @@ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C3]](s64) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s64) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -66,15 +64,13 @@ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C3]](s64) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s64) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir @@ -23,12 +23,10 @@ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -67,12 +65,10 @@ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir @@ -66,9 +66,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir @@ -18,9 +18,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -47,9 +45,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -76,9 +72,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -105,9 +99,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -136,9 +128,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -165,9 +155,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -194,9 +182,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -223,9 +209,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir @@ -274,12 +274,12 @@ ; CHECK-LABEL: name: test_uitofp_v2s64_v2i1 ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<2 x s64>) = G_UITOFP [[AND]](<2 x s64>) ; CHECK-NEXT: $q0 = COPY [[UITOFP]](<2 x s64>) %0:_(<2 x s1>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -688,8 +688,8 @@ ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD %ptr(p0) :: (load (s8)) ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[LOAD]], 1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASSERT_ZEXT]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %ext:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $x0 = COPY %ext(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -6,14 +6,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_merge_s4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s64) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) %0:_(s64) = G_CONSTANT i64 0 @@ -29,7 +26,6 @@ body: | bb.0: - ; This isn't legal but we don't support widening the destination type. ; CHECK-LABEL: name: test_merge_s16_s8 ; CHECK: %a:_(s32) = COPY $w0 ; CHECK-NEXT: %b:_(s32) = COPY $w1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -131,8 +131,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %lhs_wide, [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND %rhs_wide, [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16777215 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %lhs_wide(s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16777215 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %rhs_wide(s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] @@ -227,8 +227,8 @@ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] ; CHECK-NEXT: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store (s64), align 1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: $x0 = COPY [[MUL]](s64) ; CHECK-NEXT: $x1 = COPY [[AND]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir @@ -52,8 +52,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p0) = G_PHI %6(p0), %bb.2, [[DEF]](p0), %bb.0 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI %22(s16), %bb.2, [[DEF1]](s16), %bb.0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.3 ; CHECK-NEXT: {{ $}} @@ -71,10 +71,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ZEXT1]](s32), [[COPY]] ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PHI]], [[C2]](s64) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ICMP1]](s32) - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.3 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.3 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.bb10: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir @@ -32,8 +32,7 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -51,10 +50,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.1, [[TRUNC1]](s16), %bb.2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: ; Test that we insert legalization artifacts(Truncs here) into the correct BBs @@ -185,8 +184,7 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -203,10 +201,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.1, [[TRUNC1]](s16), %bb.2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -281,14 +279,13 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[COPY]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] - ; CHECK-NEXT: $w0 = COPY [[AND2]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] + ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x80000000) @@ -342,13 +339,11 @@ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[C]](s16), %bb.0, [[PHI]](s16), %bb.1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[COPY]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: $w0 = COPY [[AND]](s32) @@ -412,8 +407,7 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -433,12 +427,12 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC1]](s16), %bb.1, [[TRUNC2]](s16), %bb.2 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.1, [[C3]](s16), %bb.2 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C4]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND1]], [[AND2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: $w0 = COPY [[ADD2]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: @@ -524,31 +518,28 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC1]](s16), %bb.0, %22(s16), %bb.1 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND1]], [[C2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[ADD1]](s32), [[C3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 43 - ; CHECK-NEXT: G_BRCOND [[AND2]](s32), %bb.2 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 43 + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[PHI]](s16), %bb.1, [[TRUNC]](s16), %bb.0 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C7]] - ; CHECK-NEXT: $w0 = COPY [[AND3]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C6]] + ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x40000000), %bb.3(0x40000000) @@ -671,8 +662,8 @@ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 @@ -727,8 +718,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr1, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C1]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 @@ -778,8 +769,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 @@ -824,8 +815,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF1]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 @@ -919,8 +910,8 @@ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr1, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD1]](<2 x s64>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C1]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir @@ -83,8 +83,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT %ptr(p0) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PTRTOINT]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: %ext:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: $w0 = COPY %ext(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir @@ -96,8 +96,8 @@ ; CHECK-LABEL: name: test_urem_1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir @@ -11,12 +11,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[SADDE]](s64) @@ -47,14 +46,12 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UADDE3]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDE1]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[UADDE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE2]](s64) @@ -89,8 +86,8 @@ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir @@ -11,9 +11,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[SADDE]](s64) @@ -42,11 +40,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDO1]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir @@ -18,9 +18,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SADDO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SADDO1]], [[C2]] - ; CHECK-NEXT: %saddsat:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SADDO]] + ; CHECK-NEXT: %saddsat:_(s32) = G_SELECT [[SADDO1]](s32), [[ADD]], [[SADDO]] ; CHECK-NEXT: $w0 = COPY %saddsat(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 @@ -46,9 +44,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SADDO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SADDO1]], [[C2]] - ; CHECK-NEXT: %saddsat:_(s64) = G_SELECT [[AND]](s32), [[ADD]], [[SADDO]] + ; CHECK-NEXT: %saddsat:_(s64) = G_SELECT [[SADDO1]](s32), [[ADD]], [[SADDO]] ; CHECK-NEXT: $x0 = COPY %saddsat(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %x:_(s64) = COPY $x0 @@ -80,9 +76,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -117,8 +111,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -153,9 +146,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -187,13 +178,10 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s64), [[SEXT_INREG2]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SEXT_INREG2]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s64) = COPY $x0 @@ -227,9 +215,7 @@ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C1]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[UADDO1]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 24 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG2]] ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32) @@ -240,11 +226,10 @@ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV2]], 24 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C1]](s64) ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s64), [[UADDO3:%[0-9]+]]:_(s32) = G_UADDO [[ASHR]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO2]], [[UADDO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO2]], [[UADDO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -125,20 +125,18 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[AND]], 1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY2]](s32), [[C3]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY2]](s32), [[C2]](s64) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(0, 0, 0, 0) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND1]], [[AND2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND]], [[AND1]] ; CHECK-NEXT: $q0 = COPY [[OR]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 @@ -166,9 +164,7 @@ ; CHECK-NEXT: %b:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), %a(s32), %b ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[DEF]], [[DEF]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[DEF]], [[DEF]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %a:_(s32) = COPY $w0 @@ -311,17 +307,17 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[BUILD_VECTOR1]](<4 x s1>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[ANYEXT1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond0(<4 x s1>) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT3]], [[ANYEXT4]] + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond0(<4 x s1>) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT2]], [[ANYEXT3]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond1(<4 x s1>) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s1>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT5]], [[ANYEXT6]] + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond1(<4 x s1>) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s1>) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT4]], [[ANYEXT5]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND1]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC3]](<4 x s1>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s1>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ANYEXT7]], [[ANYEXT8]] + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC3]](<4 x s1>) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s1>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ANYEXT6]], [[ANYEXT7]] ; CHECK-NEXT: %select:_(<4 x s1>) = G_TRUNC [[OR]](<4 x s16>) ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_ZEXT %select(<4 x s1>) ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>) @@ -359,22 +355,20 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[C]](p0), [[C]](p0) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>) ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[BUILD_VECTOR]](<2 x p0>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[AND]], 1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXT_INREG]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT]](s64), [[C2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT]](s64), [[C1]](s64) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 0) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT]], [[SHUF]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT1]], [[XOR]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND1]], [[AND2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT]], [[SHUF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT1]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]] ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[OR]](<2 x s64>) ; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -7,8 +7,8 @@ ; CHECK-LABEL: name: test_shift ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 @@ -109,13 +109,9 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[SHL]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[UV1]], [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[SHL]], [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[UV1]], [[SELECT1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; CHECK-NEXT: $q0 = COPY [[MV]](s128) %0:_(s128) = COPY $q0 @@ -145,13 +141,9 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UV]], [[SELECT]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[UV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[LSHR]], [[C1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; CHECK-NEXT: $q0 = COPY [[MV]](s128) %0:_(s128) = COPY $q0 @@ -183,13 +175,9 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s64) ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[ASHR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C3]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UV]], [[SELECT]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[UV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[ASHR]], [[ASHR1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; CHECK-NEXT: $q0 = COPY [[MV]](s128) %0:_(s128) = COPY $q0 @@ -236,7 +224,9 @@ liveins: $w0 ; CHECK-LABEL: name: shl_cimm_32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s64) ; CHECK-NEXT: $w0 = COPY [[SHL]](s32) @@ -255,7 +245,9 @@ liveins: $w0 ; CHECK-LABEL: name: lshr_cimm_32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s64) ; CHECK-NEXT: $w0 = COPY [[LSHR]](s32) @@ -274,7 +266,9 @@ liveins: $w0 ; CHECK-LABEL: name: ashr_cimm_32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: $w0 = COPY [[ASHR]](s32) @@ -496,11 +490,8 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[SUB1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[MV1]], [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[LSHR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C3]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[MV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[MV]], [[SELECT]] ; CHECK-NEXT: %d1:_(s32), %d2:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) ; CHECK-NEXT: $w0 = COPY %d2(s32) %0:_(s64) = COPY $x0 @@ -529,7 +520,6 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[C2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s64), [[C1]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[COPY]], [[C3]] ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s64) = G_SUB [[C3]], [[COPY]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[C3]] @@ -538,14 +528,11 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[SUB3]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[MV1]], [[SUB2]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[LSHR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[MV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s32), [[OR]], [[LSHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s32), [[MV]], [[SELECT]] ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s64), [[C3]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV2]], [[SUB1]](s64) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C4]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[SHL1]], [[C1]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s32), [[SHL1]], [[C1]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SELECT1]], [[SELECT2]] ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C3]] ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C3]], [[SUB]] @@ -555,14 +542,10 @@ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[C1]], [[SUB5]](s64) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL2]] ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[C1]], [[SUB4]](s64) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C4]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s32), [[OR2]], [[LSHR3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C4]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV2]], [[SELECT3]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[AND5]](s32), [[OR1]], [[SELECT4]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[AND6]](s32), [[MV]], [[SELECT5]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s32), [[OR2]], [[LSHR3]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s32), [[MV2]], [[SELECT3]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR1]], [[SELECT4]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[MV]], [[SELECT5]] ; CHECK-NEXT: %d1:_(s32), %d2:_(s32) = G_UNMERGE_VALUES [[SELECT6]](s64) ; CHECK-NEXT: $w0 = COPY %d2(s32) %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir @@ -12,16 +12,16 @@ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY]](s64) ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[INTTOPTR]](p0) ; CHECK-NEXT: $x0 = COPY [[PTRTOINT]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC4]], [[C1]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[TRUNC2]], [[TRUNC3]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) @@ -101,7 +101,9 @@ liveins: $x0, $x1 ; CHECK-LABEL: name: bitcast128 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[MV]](s128) @@ -122,7 +124,9 @@ liveins: $x0 ; CHECK-LABEL: name: testExtOfCopyOfTrunc - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -140,7 +144,9 @@ liveins: $x0 ; CHECK-LABEL: name: testExtOf2CopyOfTrunc - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir @@ -11,12 +11,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[SSUBE]](s64) @@ -47,14 +46,12 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[USUBE3]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBE1]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[USUBE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE2]](s64) @@ -89,8 +86,8 @@ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir @@ -11,9 +11,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[USUBO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[SSUBE]](s64) @@ -42,11 +40,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBO1]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir @@ -18,9 +18,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SSUBO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSUBO1]], [[C2]] - ; CHECK-NEXT: %ssubsat:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SSUBO]] + ; CHECK-NEXT: %ssubsat:_(s32) = G_SELECT [[SSUBO1]](s32), [[ADD]], [[SSUBO]] ; CHECK-NEXT: $w0 = COPY %ssubsat(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 @@ -46,9 +44,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SSUBO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSUBO1]], [[C2]] - ; CHECK-NEXT: %ssubsat:_(s64) = G_SELECT [[AND]](s32), [[ADD]], [[SSUBO]] + ; CHECK-NEXT: %ssubsat:_(s64) = G_SELECT [[SSUBO1]](s32), [[ADD]], [[SSUBO]] ; CHECK-NEXT: $x0 = COPY %ssubsat(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %x:_(s64) = COPY $x0 @@ -80,9 +76,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -117,8 +111,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -153,9 +146,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -187,13 +178,10 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s64), [[SEXT_INREG2]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SEXT_INREG2]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s64) = COPY $x0 @@ -227,9 +215,7 @@ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C1]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[USUBO1]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 24 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG2]] ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32) @@ -240,11 +226,10 @@ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV2]], 24 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C1]](s64) ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[ASHR]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[USUBO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[USUBO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir @@ -11,9 +11,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[USUBO1]] ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) %0:_(s64) = COPY $x0 @@ -38,11 +36,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBO1]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x2 = COPY [[USUBE2]](s64) @@ -136,8 +131,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<2 x s32>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[sub]](<2 x s32>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<2 x s32>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[SUB]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 @@ -163,15 +158,15 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[IMPLICIT_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT0]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[IMPLICIT_DEF]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[VAL0:%[0-9]+]]:_(s16), [[VAL1:%[0-9]+]]:_(s16), [[VAL2:%[0-9]+]]:_(s16), [[VAL3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SUB]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[VAL0]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SUB]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 @@ -200,8 +195,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<4 x s16>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[sub]](<4 x s16>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[SUB]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = COPY $d1 @@ -230,8 +225,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<8 x s8>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[COPY]](<8 x s8>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[COPY2]](<8 x s8>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<8 x s8>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[sub]](<8 x s8>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s8>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[SUB]](<8 x s8>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<8 x s8>) = COPY $d0 %1:_(<8 x s8>) = COPY $d1 @@ -260,8 +255,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<16 x s8>) = COPY $q3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY]](<16 x s8>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY2]](<16 x s8>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<16 x s8>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $q0 = COPY [[sub]](<16 x s8>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<16 x s8>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $q0 = COPY [[SUB]](<16 x s8>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<16 x s8>) = COPY $q0 %1:_(<16 x s8>) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir @@ -18,9 +18,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[C]], [[UADDO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[UADDO1]](s32), [[C]], [[UADDO]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -48,9 +46,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[C]], [[UADDO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDO1]](s32), [[C]], [[UADDO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -83,9 +79,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -121,9 +115,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -162,9 +154,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[COPY2]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY2]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir @@ -11,12 +11,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE3]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE2]](s64) @@ -47,14 +46,12 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UADDE3]], [[C]] - ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s64), [[UADDE5:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDE1]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s64), [[UADDE5:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[UADDE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE5]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE2]](s64) @@ -85,13 +82,13 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C1]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[AND]], [[AND1]], [[AND2]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UADDE]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir @@ -11,9 +11,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) @@ -42,11 +40,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE3]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) @@ -74,8 +69,8 @@ ; CHECK-LABEL: name: test_scalar_uaddo_small ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir @@ -18,9 +18,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[C]], [[USUBO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[USUBO1]](s32), [[C]], [[USUBO]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -48,9 +46,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[C]], [[USUBO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBO1]](s32), [[C]], [[USUBO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -83,9 +79,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -121,9 +115,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -159,9 +151,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir @@ -11,12 +11,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE3]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE2]](s64) @@ -47,14 +46,12 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[USUBE3]], [[C]] - ; CHECK-NEXT: [[USUBE4:%[0-9]+]]:_(s64), [[USUBE5:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBE1]] + ; CHECK-NEXT: [[USUBE4:%[0-9]+]]:_(s64), [[USUBE5:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[USUBE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE5]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE2]](s64) @@ -85,13 +82,13 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C1]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[AND]], [[AND1]], [[AND2]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[USUBE]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir @@ -11,9 +11,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[USUBO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) @@ -42,11 +40,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBO1]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE3]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) @@ -74,8 +69,8 @@ ; CHECK-LABEL: name: test_scalar_usubo_small ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir @@ -17,8 +17,8 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI %33(s16), %bb.2, [[DEF]](s16), %bb.0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 46 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir b/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir @@ -10,8 +10,7 @@ ; CHECK: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ogt), [[COPY]](s32), [[COPY1]] ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; CHECK: $w0 = COPY [[AND1]](s32) + ; CHECK: $w0 = COPY [[AND]](s32) %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(s1) = G_FCMP floatpred(ogt), %0(s32), %1 diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll --- a/llvm/test/CodeGen/AArch64/zext.ll +++ b/llvm/test/CodeGen/AArch64/zext.ll @@ -1246,7 +1246,7 @@ ; ; CHECK-GI-LABEL: zext_v16i10_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov s7, w0 +; CHECK-GI-NEXT: fmov s16, w0 ; CHECK-GI-NEXT: fmov s17, w2 ; CHECK-GI-NEXT: ldr s0, [sp] ; CHECK-GI-NEXT: fmov s18, w4 @@ -1257,33 +1257,33 @@ ; CHECK-GI-NEXT: ldr s4, [sp, #32] ; CHECK-GI-NEXT: ldr s5, [sp, #40] ; CHECK-GI-NEXT: ldr s6, [sp, #48] -; CHECK-GI-NEXT: ldr s16, [sp, #56] -; CHECK-GI-NEXT: mov v7.s[1], w1 +; CHECK-GI-NEXT: ldr s7, [sp, #56] +; CHECK-GI-NEXT: mov v16.s[1], w1 ; CHECK-GI-NEXT: mov v17.s[1], w3 ; CHECK-GI-NEXT: mov v18.s[1], w5 ; CHECK-GI-NEXT: mov v19.s[1], w7 ; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] ; CHECK-GI-NEXT: mov v2.s[1], v3.s[0] ; CHECK-GI-NEXT: mov v4.s[1], v5.s[0] -; CHECK-GI-NEXT: mov v6.s[1], v16.s[0] +; CHECK-GI-NEXT: mov v6.s[1], v7.s[0] ; CHECK-GI-NEXT: adrp x8, .LCPI54_0 -; CHECK-GI-NEXT: ldr q16, [x8, :lo12:.LCPI54_0] -; CHECK-GI-NEXT: ushll v1.2d, v7.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v16.2s, #0 ; CHECK-GI-NEXT: ushll v3.2d, v17.2s, #0 ; CHECK-GI-NEXT: ushll v5.2d, v18.2s, #0 ; CHECK-GI-NEXT: ushll v7.2d, v19.2s, #0 -; CHECK-GI-NEXT: ushll v17.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v16.2d, v0.2s, #0 ; CHECK-GI-NEXT: ushll v18.2d, v2.2s, #0 ; CHECK-GI-NEXT: ushll v19.2d, v4.2s, #0 ; CHECK-GI-NEXT: ushll v20.2d, v6.2s, #0 -; CHECK-GI-NEXT: and v0.16b, v1.16b, v16.16b -; CHECK-GI-NEXT: and v1.16b, v3.16b, v16.16b -; CHECK-GI-NEXT: and v2.16b, v5.16b, v16.16b -; CHECK-GI-NEXT: and v3.16b, v7.16b, v16.16b -; CHECK-GI-NEXT: and v4.16b, v17.16b, v16.16b -; CHECK-GI-NEXT: and v5.16b, v18.16b, v16.16b -; CHECK-GI-NEXT: and v6.16b, v19.16b, v16.16b -; CHECK-GI-NEXT: and v7.16b, v20.16b, v16.16b +; CHECK-GI-NEXT: ldr q17, [x8, :lo12:.LCPI54_0] +; CHECK-GI-NEXT: and v0.16b, v1.16b, v17.16b +; CHECK-GI-NEXT: and v1.16b, v3.16b, v17.16b +; CHECK-GI-NEXT: and v2.16b, v5.16b, v17.16b +; CHECK-GI-NEXT: and v3.16b, v7.16b, v17.16b +; CHECK-GI-NEXT: and v4.16b, v16.16b, v17.16b +; CHECK-GI-NEXT: and v5.16b, v18.16b, v17.16b +; CHECK-GI-NEXT: and v6.16b, v19.16b, v17.16b +; CHECK-GI-NEXT: and v7.16b, v20.16b, v17.16b ; CHECK-GI-NEXT: ret entry: %c = zext <16 x i10> %a to <16 x i64> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir @@ -50,8 +50,8 @@ ; CHECK-NEXT: %and5:_(s1) = G_XOR %unmerge3_5, %negone ; CHECK-NEXT: %and6:_(s1) = G_XOR %unmerge3_6, %negone ; CHECK-NEXT: %and7:_(s1) = G_XOR %unmerge3_7, %negone - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT %and0(s1) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C10]] ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT %and1(s1) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT1]], [[C10]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -139,8 +139,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -549,11 +548,9 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[LSHR]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND1]](s32), [[LSHR1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64), implicit [[MV1]](s64) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 @@ -1305,8 +1302,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -1453,8 +1450,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -31,10 +31,10 @@ ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) @@ -55,13 +55,13 @@ ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BITCAST]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -134,8 +134,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 @@ -158,8 +158,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 @@ -181,8 +181,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -209,12 +209,12 @@ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -241,12 +241,12 @@ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -269,12 +269,12 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 8 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -1327,9 +1327,9 @@ ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2 ; GFX9-NEXT: v_and_b32_e32 v8, 7, v2 ; GFX9-NEXT: v_not_b32_e32 v2, v2 -; GFX9-NEXT: s_mov_b32 s5, 1 +; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX9-NEXT: v_lshrrev_b16_sdwa v10, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v10, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_lshlrev_b16_e32 v8, v8, v0 ; GFX9-NEXT: v_lshrrev_b16_e32 v2, v2, v10 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 @@ -1338,7 +1338,7 @@ ; GFX9-NEXT: v_not_b32_e32 v5, v5 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 -; GFX9-NEXT: v_lshrrev_b16_sdwa v4, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v4, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_mov_b32_e32 v9, 0xff ; GFX9-NEXT: v_lshlrev_b16_e32 v3, v8, v3 ; GFX9-NEXT: v_lshrrev_b16_e32 v4, v5, v4 @@ -1360,9 +1360,9 @@ ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v6, v1 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 -; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1 +; GFX9-NEXT: v_and_or_b32 v1, v2, s5, v1 ; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v4 ; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 @@ -1807,48 +1807,47 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { ; GFX6-LABEL: s_fshl_v2i24: ; GFX6: ; %bb.0: -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX6-NEXT: s_lshr_b32 s6, s0, 16 ; GFX6-NEXT: s_lshr_b32 s7, s0, 24 ; GFX6-NEXT: s_and_b32 s9, s0, 0xff ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX6-NEXT: s_lshl_b32 s0, s0, 8 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff +; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX6-NEXT: s_or_b32 s0, s9, s0 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: s_lshr_b32 s8, s1, 8 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_or_b32 s0, s0, s6 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff ; GFX6-NEXT: s_or_b32 s1, s7, s1 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: s_or_b32 s1, s1, s6 ; GFX6-NEXT: s_lshr_b32 s6, s2, 16 ; GFX6-NEXT: s_lshr_b32 s7, s2, 24 ; GFX6-NEXT: s_and_b32 s9, s2, 0xff ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008 -; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff ; GFX6-NEXT: s_or_b32 s2, s9, s2 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX6-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX6-NEXT: s_lshr_b32 s8, s3, 8 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff +; GFX6-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX6-NEXT: s_or_b32 s2, s2, s6 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff -; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_or_b32 s3, s7, s3 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 @@ -1858,78 +1857,77 @@ ; GFX6-NEXT: s_lshr_b32 s7, s4, 24 ; GFX6-NEXT: s_and_b32 s9, s4, 0xff ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008 +; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX6-NEXT: s_lshl_b32 s4, s4, 8 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX6-NEXT: s_or_b32 s4, s9, s4 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_or_b32 s4, s4, s6 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX6-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_lshr_b32 s8, s5, 8 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX6-NEXT: s_and_b32 s5, s5, 0xff -; GFX6-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX6-NEXT: s_lshl_b32 s5, s5, 8 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: s_or_b32 s5, s7, s5 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX6-NEXT: s_or_b32 s5, s5, s6 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v1 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 ; GFX6-NEXT: s_lshr_b32 s0, s2, 1 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX6-NEXT: s_lshr_b32 s0, s3, 1 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_lshl_b32_e32 v1, s1, v1 +; GFX6-NEXT: v_lshl_b32_e32 v0, s1, v0 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_readfirstlane_b32 s0, v1 +; GFX6-NEXT: v_readfirstlane_b32 s1, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_fshl_v2i24: @@ -1942,9 +1940,7 @@ ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 ; GFX8-NEXT: s_or_b32 s0, s0, s6 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s9, s1, 8 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 @@ -1952,24 +1948,24 @@ ; GFX8-NEXT: s_or_b32 s0, s0, s6 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX8-NEXT: s_or_b32 s1, s8, s1 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: s_or_b32 s1, s1, s6 ; GFX8-NEXT: s_lshr_b32 s6, s2, 8 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff ; GFX8-NEXT: s_lshr_b32 s7, s2, 16 ; GFX8-NEXT: s_lshr_b32 s8, s2, 24 ; GFX8-NEXT: s_and_b32 s2, s2, 0xff ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX8-NEXT: s_or_b32 s2, s2, s6 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX8-NEXT: s_lshr_b32 s9, s3, 8 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 @@ -1977,11 +1973,12 @@ ; GFX8-NEXT: s_or_b32 s2, s2, s6 ; GFX8-NEXT: s_lshl_b32 s3, s3, 8 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff +; GFX8-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX8-NEXT: s_or_b32 s3, s8, s3 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX8-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_or_b32 s3, s3, s6 ; GFX8-NEXT: s_lshr_b32 s6, s4, 8 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff @@ -1989,212 +1986,207 @@ ; GFX8-NEXT: s_lshr_b32 s8, s4, 24 ; GFX8-NEXT: s_and_b32 s4, s4, 0xff ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 +; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX8-NEXT: s_or_b32 s4, s4, s6 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 ; GFX8-NEXT: s_or_b32 s4, s4, s6 -; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 +; GFX8-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s9, s5, 8 -; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX8-NEXT: s_and_b32 s5, s5, 0xff -; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX8-NEXT: s_lshl_b32 s5, s5, 8 -; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: s_or_b32 s5, s8, s5 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX8-NEXT: s_or_b32 s5, s5, s6 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v1 +; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0 ; GFX8-NEXT: s_lshr_b32 s0, s2, 1 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1 -; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX8-NEXT: s_lshr_b32 s0, s3, 1 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s1 +; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s1 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_mov_b32_e32 v2, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_mov_b32_e32 v4, 16 -; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v1 +; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_readfirstlane_b32 s0, v1 +; GFX8-NEXT: v_readfirstlane_b32 s1, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_fshl_v2i24: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s7, s0, 8 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff ; GFX9-NEXT: s_lshr_b32 s9, s0, 16 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s10, s0, 24 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 ; GFX9-NEXT: s_or_b32 s0, s0, s7 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 ; GFX9-NEXT: s_lshr_b32 s11, s1, 8 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff -; GFX9-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX9-NEXT: s_or_b32 s0, s0, s7 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX9-NEXT: s_or_b32 s1, s10, s1 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s1, s1, s7 ; GFX9-NEXT: s_lshr_b32 s7, s2, 8 -; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff ; GFX9-NEXT: s_lshr_b32 s9, s2, 16 ; GFX9-NEXT: s_lshr_b32 s10, s2, 24 ; GFX9-NEXT: s_and_b32 s2, s2, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX9-NEXT: s_or_b32 s2, s2, s7 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff +; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX9-NEXT: s_lshr_b32 s11, s3, 8 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_and_b32 s3, s3, 0xff -; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX9-NEXT: s_or_b32 s2, s2, s7 ; GFX9-NEXT: s_lshl_b32 s3, s3, 8 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX9-NEXT: s_or_b32 s3, s10, s3 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s3, s3, s7 ; GFX9-NEXT: s_lshr_b32 s7, s4, 8 -; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff -; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s9, s4, 16 ; GFX9-NEXT: s_lshr_b32 s10, s4, 24 ; GFX9-NEXT: s_and_b32 s4, s4, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX9-NEXT: s_or_b32 s4, s4, s7 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 -; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX9-NEXT: s_or_b32 s4, s4, s7 -; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX9-NEXT: s_lshr_b32 s11, s5, 8 ; GFX9-NEXT: s_and_b32 s5, s5, 0xff -; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_lshl_b32 s5, s5, 8 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff ; GFX9-NEXT: s_or_b32 s5, s10, s5 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX9-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s5, s5, s7 -; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 -; GFX9-NEXT: s_lshr_b32 s2, s2, 1 -; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2 -; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 -; GFX9-NEXT: v_lshl_or_b32 v0, s0, v0, v2 -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1 +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1 -; GFX9-NEXT: s_lshr_b32 s0, s3, 1 +; GFX9-NEXT: s_lshr_b32 s2, s2, 1 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2 +; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0 +; GFX9-NEXT: v_lshl_or_b32 v1, s0, v1, v2 +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 +; GFX9-NEXT: s_lshr_b32 s0, s3, 1 +; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s0 ; GFX9-NEXT: s_mov_b32 s6, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, s1, v1, v2 +; GFX9-NEXT: v_lshl_or_b32 v0, s1, v0, v2 ; GFX9-NEXT: s_mov_b32 s8, 16 ; GFX9-NEXT: s_movk_i32 s0, 0xff -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX9-NEXT: v_and_or_b32 v2, v0, s0, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX9-NEXT: v_and_or_b32 v2, v1, s0, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 -; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX9-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, 8, v2 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_fshl_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX10-NEXT: s_lshr_b32 s6, s0, 8 ; GFX10-NEXT: s_lshr_b32 s7, s0, 16 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff -; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s6, s6, 8 ; GFX10-NEXT: s_and_b32 s7, s7, 0xff @@ -2202,244 +2194,251 @@ ; GFX10-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX10-NEXT: s_lshr_b32 s7, s4, 8 ; GFX10-NEXT: s_lshr_b32 s10, s4, 16 -; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX10-NEXT: s_and_b32 s7, s7, 0xff ; GFX10-NEXT: s_lshr_b32 s11, s4, 24 +; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: s_lshl_b32 s7, s7, 8 ; GFX10-NEXT: s_lshr_b32 s12, s5, 8 +; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: s_or_b32 s4, s4, s7 -; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 ; GFX10-NEXT: s_and_b32 s7, s10, 0xff ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 +; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX10-NEXT: s_and_b32 s5, s5, 0xff ; GFX10-NEXT: s_lshl_b32 s7, s7, 16 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8 -; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX10-NEXT: s_or_b32 s4, s4, s7 ; GFX10-NEXT: s_and_b32 s7, s12, 0xff +; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX10-NEXT: v_mul_hi_u32 v3, v0, v3 ; GFX10-NEXT: s_or_b32 s5, s11, s5 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 -; GFX10-NEXT: s_or_b32 s5, s5, s7 ; GFX10-NEXT: s_lshr_b32 s9, s1, 8 +; GFX10-NEXT: s_or_b32 s5, s5, s7 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v3 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff -; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX10-NEXT: s_lshl_b32 s1, s1, 8 ; GFX10-NEXT: s_and_b32 s7, s9, 0xff +; GFX10-NEXT: s_lshl_b32 s1, s1, 8 +; GFX10-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX10-NEXT: s_or_b32 s1, s8, s1 ; GFX10-NEXT: s_lshr_b32 s8, s2, 8 ; GFX10-NEXT: s_lshr_b32 s9, s2, 16 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff -; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX10-NEXT: s_lshr_b32 s10, s2, 24 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff +; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX10-NEXT: s_lshl_b32 s8, s8, 8 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX10-NEXT: s_or_b32 s2, s2, s8 ; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 +; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX10-NEXT: s_lshl_b32 s6, s6, 16 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, s5, v0 ; GFX10-NEXT: s_lshr_b32 s4, s3, 8 ; GFX10-NEXT: s_and_b32 s5, s9, 0xff ; GFX10-NEXT: s_and_b32 s3, s3, 0xff -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_lshl_b32 s5, s5, 16 ; GFX10-NEXT: s_or_b32 s3, s10, s3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_or_b32 s2, s2, s5 ; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: s_lshr_b32 s2, s2, 1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_or_b32 s3, s3, s4 -; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX10-NEXT: s_lshl_b32 s6, s6, 16 +; GFX10-NEXT: s_lshr_b32 s2, s2, 1 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX10-NEXT: s_lshl_b32 s7, s7, 16 ; GFX10-NEXT: s_or_b32 s0, s0, s6 +; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX10-NEXT: s_or_b32 s1, s1, s7 ; GFX10-NEXT: v_lshrrev_b32_e64 v2, v2, s2 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX10-NEXT: s_lshr_b32 s2, s3, 1 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: v_lshl_or_b32 v0, s0, v0, v2 +; GFX10-NEXT: v_lshl_or_b32 v1, s0, v1, v2 ; GFX10-NEXT: v_lshrrev_b32_e64 v3, v3, s2 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX10-NEXT: v_lshl_or_b32 v1, s1, v1, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshl_or_b32 v0, s1, v0, v3 ; GFX10-NEXT: s_mov_b32 s0, 16 -; GFX10-NEXT: v_and_or_b32 v2, v0, 0xff, v2 -; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX10-NEXT: v_and_or_b32 v2, v1, 0xff, v2 +; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_lshl_or_b32 v0, v0, 8, v4 +; GFX10-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX10-NEXT: v_readfirstlane_b32 s1, v0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: s_fshl_v2i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX11-NEXT: s_lshr_b32 s6, s0, 8 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff -; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX11-NEXT: s_lshr_b32 s8, s0, 24 +; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff ; GFX11-NEXT: s_lshl_b32 s6, s6, 8 -; GFX11-NEXT: s_lshr_b32 s10, s4, 24 +; GFX11-NEXT: s_and_b32 s7, s7, 0xff ; GFX11-NEXT: s_or_b32 s0, s0, s6 -; GFX11-NEXT: s_and_b32 s6, s7, 0xff +; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 -; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-NEXT: s_lshr_b32 s7, s4, 16 +; GFX11-NEXT: s_lshr_b32 s9, s4, 16 ; GFX11-NEXT: s_or_b32 s0, s0, s6 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX11-NEXT: s_lshr_b32 s6, s4, 8 -; GFX11-NEXT: s_and_b32 s4, s4, 0xff +; GFX11-NEXT: s_lshr_b32 s10, s4, 24 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff -; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 +; GFX11-NEXT: s_and_b32 s4, s4, 0xff +; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX11-NEXT: s_lshl_b32 s6, s6, 8 -; GFX11-NEXT: s_and_b32 s7, s7, 0xff -; GFX11-NEXT: s_or_b32 s4, s4, s6 -; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX11-NEXT: s_lshr_b32 s11, s5, 8 +; GFX11-NEXT: s_or_b32 s4, s4, s6 +; GFX11-NEXT: s_and_b32 s6, s9, 0xff +; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 +; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16 ; GFX11-NEXT: s_and_b32 s5, s5, 0xff ; GFX11-NEXT: s_or_b32 s4, s4, s6 ; GFX11-NEXT: s_lshl_b32 s5, s5, 8 ; GFX11-NEXT: s_and_b32 s6, s11, 0xff +; GFX11-NEXT: v_mul_hi_u32 v3, v0, v3 ; GFX11-NEXT: s_or_b32 s5, s10, s5 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2 -; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3 ; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-NEXT: s_lshr_b32 s9, s1, 8 -; GFX11-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX11-NEXT: s_lshr_b32 s7, s1, 8 ; GFX11-NEXT: s_or_b32 s5, s5, s6 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff -; GFX11-NEXT: s_and_b32 s7, s9, 0xff -; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v3 +; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 +; GFX11-NEXT: s_and_b32 s6, s7, 0xff ; GFX11-NEXT: s_lshr_b32 s7, s2, 8 -; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX11-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX11-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 ; GFX11-NEXT: s_or_b32 s1, s8, s1 ; GFX11-NEXT: s_lshr_b32 s8, s2, 16 ; GFX11-NEXT: s_and_b32 s7, s7, 0xff -; GFX11-NEXT: s_lshr_b32 s9, s3, 8 -; GFX11-NEXT: s_lshl_b32 s7, s7, 8 -; GFX11-NEXT: s_and_b32 s3, s3, 0xff -; GFX11-NEXT: v_sub_nc_u32_e32 v0, s4, v0 -; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX11-NEXT: s_lshr_b32 s4, s2, 24 +; GFX11-NEXT: s_lshr_b32 s9, s2, 24 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_lshl_b32 s7, s7, 8 +; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX11-NEXT: v_mul_hi_u32 v2, v1, v2 ; GFX11-NEXT: s_or_b32 s2, s2, s7 -; GFX11-NEXT: s_or_b32 s3, s4, s3 -; GFX11-NEXT: v_sub_nc_u32_e32 v1, s5, v1 +; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_sub_nc_u32_e32 v0, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX11-NEXT: s_and_b32 s5, s8, 0xff -; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX11-NEXT: s_lshl_b32 s5, s5, 16 -; GFX11-NEXT: s_and_b32 s4, s9, 0xff ; GFX11-NEXT: s_or_b32 s2, s2, s5 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_lshr_b32 s2, s2, 1 -; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1 +; GFX11-NEXT: s_lshr_b32 s4, s3, 8 +; GFX11-NEXT: s_and_b32 s3, s3, 0xff +; GFX11-NEXT: s_and_b32 s4, s4, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX11-NEXT: s_or_b32 s3, s9, s3 +; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-NEXT: s_lshr_b32 s3, s3, 1 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX11-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 -; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v2 :: v_dual_and_b32 v0, 0xffffff, v0 -; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v1 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX11-NEXT: v_lshrrev_b32_e64 v2, v2, s2 -; GFX11-NEXT: s_and_b32 s2, 0xffff, s3 -; GFX11-NEXT: s_lshl_b32 s3, s4, 16 +; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 -; GFX11-NEXT: s_or_b32 s2, s2, s3 -; GFX11-NEXT: v_lshl_or_b32 v0, s0, v0, v2 -; GFX11-NEXT: s_lshr_b32 s0, s2, 1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_lshrrev_b32_e64 v2, v3, s0 -; GFX11-NEXT: s_or_b32 s0, s1, s6 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshrrev_b32_e64 v2, v2, s2 +; GFX11-NEXT: s_lshl_b32 s2, s6, 16 +; GFX11-NEXT: v_lshrrev_b32_e64 v3, v3, s3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v1, s0, v1, v2 +; GFX11-NEXT: s_or_b32 s0, s1, s2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_lshl_or_b32 v0, s0, v0, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v3 -; GFX11-NEXT: v_bfe_u32 v3, v0, 16, 8 -; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v1 +; GFX11-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v2 -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_lshlrev_b32_e32 v3, 24, v4 -; GFX11-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX11-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX11-NEXT: v_or3_b32 v0, v0, v2, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX11-NEXT: v_and_or_b32 v1, v1, 0xff, v2 +; GFX11-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or3_b32 v1, v1, v3, v4 +; GFX11-NEXT: v_lshl_or_b32 v0, v0, 8, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s0, v1 +; GFX11-NEXT: v_readfirstlane_b32 s1, v0 ; GFX11-NEXT: ; return to shader part epilog %lhs = bitcast i48 %lhs.arg to <2 x i24> %rhs = bitcast i48 %rhs.arg to <2 x i24> @@ -2455,37 +2454,35 @@ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX6-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX6-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX6-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX6-NEXT: v_mul_lo_u32 v9, v7, v8 +; GFX6-NEXT: v_mul_lo_u32 v8, v6, v8 +; GFX6-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v9 -; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX6-NEXT: v_mul_lo_u32 v7, v8, v7 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GFX6-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7 +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_mul_hi_u32 v7, v8, v7 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GFX6-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0 -; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v6 -; GFX6-NEXT: v_mul_lo_u32 v6, v7, 24 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 @@ -2509,37 +2506,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX8-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX8-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX8-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX8-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX8-NEXT: v_mul_lo_u32 v9, v7, v8 +; GFX8-NEXT: v_mul_lo_u32 v8, v6, v8 +; GFX8-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 -; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v9 -; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX8-NEXT: v_mul_lo_u32 v7, v8, v7 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 +; GFX8-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7 +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_mul_hi_u32 v7, v8, v7 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7 -; GFX8-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 23, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 +; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v4, v0 -; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v6 -; GFX8-NEXT: v_mul_lo_u32 v6, v7, 24 +; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 @@ -2563,37 +2558,35 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 -; GFX9-NEXT: v_mul_lo_u32 v8, v6, v7 ; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX9-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX9-NEXT: v_mul_lo_u32 v7, v9, v7 -; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX9-NEXT: v_bfe_u32 v3, v3, 1, 23 -; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 +; GFX9-NEXT: v_mul_lo_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v9, v7, v9 +; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v9 +; GFX9-NEXT: v_mul_hi_u32 v7, v4, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 +; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7 +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_mul_hi_u32 v6, v5, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc ; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4 ; GFX9-NEXT: v_and_b32_e32 v7, 0xffffff, v7 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v7, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v5, v6 @@ -2614,29 +2607,27 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX10-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7 ; GFX10-NEXT: v_bfe_u32 v3, v3, 1, 23 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 ; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 -; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 -; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 +; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX10-NEXT: v_mul_hi_u32 v8, v7, v8 +; GFX10-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v9 +; GFX10-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX10-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 -; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7 +; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 @@ -2665,64 +2656,63 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX11-NEXT: v_bfe_u32 v2, v2, 1, 23 ; GFX11-NEXT: v_bfe_u32 v3, v3, 1, 23 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v7, v7 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_mul_f32 v7, 0x4f7ffffe, v7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX11-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX11-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v9 ; GFX11-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX11-NEXT: v_mul_hi_u32 v9, v7, v9 +; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 +; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v9 +; GFX11-NEXT: v_mul_hi_u32 v8, v7, v8 +; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX11-NEXT: v_mul_hi_u32 v7, v4, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v7, v5, v7 ; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v7 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 -; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4 -; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2 ; GFX11-NEXT: v_and_b32_e32 v7, 0xffffff, v7 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2 +; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v4, v2 -; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_lshl_or_b32 v1, v1, v5, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -1371,48 +1371,48 @@ ; GFX10-LABEL: v_fshr_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_not_b32_e32 v5, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX10-NEXT: v_not_b32_e32 v8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GFX10-NEXT: v_not_b32_e32 v12, v7 -; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 -; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 +; GFX10-NEXT: v_and_b32_e32 v5, 7, v5 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 -; GFX10-NEXT: v_not_b32_e32 v13, v10 +; GFX10-NEXT: v_not_b32_e32 v10, v7 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2 +; GFX10-NEXT: v_lshlrev_b16 v0, v5, v0 +; GFX10-NEXT: v_and_b32_e32 v5, 7, v10 +; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 +; GFX10-NEXT: v_not_b32_e32 v13, v11 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_lshlrev_b16 v3, v12, v3 -; GFX10-NEXT: v_not_b32_e32 v12, v11 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 -; GFX10-NEXT: v_and_b32_e32 v8, 0xff, v1 +; GFX10-NEXT: v_and_b32_e32 v10, 0xff, v1 +; GFX10-NEXT: v_lshlrev_b16 v3, v5, v3 +; GFX10-NEXT: v_and_b32_e32 v5, 0xff, v8 +; GFX10-NEXT: v_not_b32_e32 v8, v12 ; GFX10-NEXT: v_and_b32_e32 v7, 7, v7 -; GFX10-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 +; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 ; GFX10-NEXT: v_and_b32_e32 v13, 7, v13 ; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 ; GFX10-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 +; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6 ; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 -; GFX10-NEXT: v_lshlrev_b16 v5, 1, v5 -; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX10-NEXT: v_lshrrev_b16 v6, v7, v6 +; GFX10-NEXT: v_lshrrev_b16 v5, v7, v5 ; GFX10-NEXT: v_lshlrev_b16 v4, v13, v4 -; GFX10-NEXT: v_lshrrev_b16 v1, v10, v1 -; GFX10-NEXT: v_lshlrev_b16 v5, v12, v5 -; GFX10-NEXT: v_lshrrev_b16 v7, v11, v9 -; GFX10-NEXT: v_lshrrev_b16 v2, v2, v8 -; GFX10-NEXT: v_or_b32_e32 v3, v3, v6 -; GFX10-NEXT: v_mov_b32_e32 v6, 8 +; GFX10-NEXT: v_lshrrev_b16 v1, v11, v1 +; GFX10-NEXT: v_lshlrev_b16 v6, v8, v6 +; GFX10-NEXT: v_lshrrev_b16 v7, v12, v9 +; GFX10-NEXT: v_lshrrev_b16 v2, v2, v10 +; GFX10-NEXT: v_or_b32_e32 v3, v3, v5 +; GFX10-NEXT: v_mov_b32_e32 v5, 8 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 -; GFX10-NEXT: v_or_b32_e32 v4, v5, v7 +; GFX10-NEXT: v_or_b32_e32 v4, v6, v7 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v4 ; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v2 @@ -1820,14 +1820,13 @@ ; GFX6-NEXT: s_lshr_b32 s6, s0, 16 ; GFX6-NEXT: s_lshr_b32 s7, s0, 24 ; GFX6-NEXT: s_lshr_b32 s8, s1, 8 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_and_b32 s9, s0, 0xff ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX6-NEXT: s_lshl_b32 s0, s0, 8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: s_or_b32 s0, s9, s0 ; GFX6-NEXT: s_or_b32 s1, s7, s1 ; GFX6-NEXT: s_and_b32 s7, s8, 0xff @@ -1835,19 +1834,19 @@ ; GFX6-NEXT: s_lshr_b32 s9, s2, 24 ; GFX6-NEXT: s_and_b32 s11, s2, 0xff ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008 -; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8 ; GFX6-NEXT: s_and_b32 s8, s8, 0xff ; GFX6-NEXT: s_or_b32 s2, s11, s2 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX6-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX6-NEXT: s_lshr_b32 s10, s3, 8 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff +; GFX6-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX6-NEXT: s_or_b32 s2, s2, s8 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8 ; GFX6-NEXT: s_and_b32 s8, s10, 0xff -; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_or_b32 s3, s9, s3 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 @@ -1857,103 +1856,100 @@ ; GFX6-NEXT: s_lshr_b32 s9, s4, 24 ; GFX6-NEXT: s_and_b32 s11, s4, 0xff ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008 +; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX6-NEXT: s_lshl_b32 s4, s4, 8 ; GFX6-NEXT: s_and_b32 s8, s8, 0xff -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX6-NEXT: s_or_b32 s4, s11, s4 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_or_b32 s4, s4, s8 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX6-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_lshr_b32 s10, s5, 8 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX6-NEXT: s_and_b32 s5, s5, 0xff -; GFX6-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX6-NEXT: s_lshl_b32 s5, s5, 8 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_and_b32 s8, s10, 0xff -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: s_or_b32 s5, s9, s5 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX6-NEXT: s_or_b32 s5, s5, s8 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v1 ; GFX6-NEXT: s_lshl_b32 s4, s6, 17 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1 ; GFX6-NEXT: s_or_b32 s0, s4, s0 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_lshr_b32_e32 v0, s2, v0 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_lshr_b32_e32 v1, s2, v1 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX6-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0 ; GFX6-NEXT: s_lshl_b32 s0, s7, 17 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_lshr_b32_e32 v1, s3, v1 -; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_lshr_b32_e32 v0, s3, v0 +; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_readfirstlane_b32 s0, v1 +; GFX6-NEXT: v_readfirstlane_b32 s1, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_fshr_v2i24: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s9, s1, 8 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s6, s0, 8 ; GFX8-NEXT: s_lshr_b32 s8, s0, 24 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff ; GFX8-NEXT: s_or_b32 s1, s8, s1 ; GFX8-NEXT: s_lshr_b32 s8, s2, 8 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s7, s0, 16 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 @@ -1965,11 +1961,11 @@ ; GFX8-NEXT: s_lshr_b32 s10, s2, 24 ; GFX8-NEXT: s_and_b32 s2, s2, 0xff ; GFX8-NEXT: s_lshl_b32 s8, s8, 8 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX8-NEXT: s_or_b32 s2, s2, s8 ; GFX8-NEXT: s_and_b32 s8, s9, 0xff -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX8-NEXT: s_lshr_b32 s11, s3, 8 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 @@ -1977,11 +1973,12 @@ ; GFX8-NEXT: s_or_b32 s2, s2, s8 ; GFX8-NEXT: s_lshl_b32 s3, s3, 8 ; GFX8-NEXT: s_and_b32 s8, s11, 0xff +; GFX8-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX8-NEXT: s_or_b32 s3, s10, s3 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX8-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 -; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_or_b32 s3, s3, s8 ; GFX8-NEXT: s_lshr_b32 s8, s4, 8 ; GFX8-NEXT: s_and_b32 s8, s8, 0xff @@ -1989,101 +1986,95 @@ ; GFX8-NEXT: s_lshr_b32 s10, s4, 24 ; GFX8-NEXT: s_and_b32 s4, s4, 0xff ; GFX8-NEXT: s_lshl_b32 s8, s8, 8 +; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX8-NEXT: s_or_b32 s4, s4, s8 ; GFX8-NEXT: s_and_b32 s8, s9, 0xff -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 ; GFX8-NEXT: s_or_b32 s4, s4, s8 -; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 +; GFX8-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s11, s5, 8 -; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX8-NEXT: s_and_b32 s5, s5, 0xff -; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX8-NEXT: s_lshl_b32 s5, s5, 8 -; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_and_b32 s8, s11, 0xff -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: s_or_b32 s5, s10, s5 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX8-NEXT: s_or_b32 s5, s5, s8 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0 -; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v1 +; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX8-NEXT: s_lshl_b32 s4, s6, 17 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1 ; GFX8-NEXT: s_or_b32 s0, s4, s0 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s2 -; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s2 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX8-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0 ; GFX8-NEXT: s_lshl_b32 s0, s7, 17 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1 ; GFX8-NEXT: s_or_b32 s0, s0, s1 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s3 -; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s3 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_mov_b32_e32 v4, 16 -; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v1 +; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_readfirstlane_b32 s0, v1 +; GFX8-NEXT: v_readfirstlane_b32 s1, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_fshr_v2i24: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 ; GFX9-NEXT: s_lshr_b32 s11, s1, 8 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s7, s0, 8 ; GFX9-NEXT: s_lshr_b32 s10, s0, 24 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8 -; GFX9-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff ; GFX9-NEXT: s_or_b32 s1, s10, s1 ; GFX9-NEXT: s_lshr_b32 s10, s2, 8 -; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_lshr_b32 s9, s0, 16 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 @@ -2095,12 +2086,11 @@ ; GFX9-NEXT: s_lshr_b32 s12, s2, 24 ; GFX9-NEXT: s_and_b32 s2, s2, 0xff ; GFX9-NEXT: s_lshl_b32 s10, s10, 8 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX9-NEXT: s_or_b32 s2, s2, s10 ; GFX9-NEXT: s_and_b32 s10, s11, 0xff -; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 +; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX9-NEXT: s_lshr_b32 s13, s3, 8 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 @@ -2108,101 +2098,103 @@ ; GFX9-NEXT: s_or_b32 s2, s2, s10 ; GFX9-NEXT: s_lshl_b32 s3, s3, 8 ; GFX9-NEXT: s_and_b32 s10, s13, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX9-NEXT: s_or_b32 s3, s12, s3 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 +; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 -; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: s_or_b32 s3, s3, s10 ; GFX9-NEXT: s_lshr_b32 s10, s4, 8 -; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_and_b32 s10, s10, 0xff ; GFX9-NEXT: s_lshr_b32 s11, s4, 16 ; GFX9-NEXT: s_lshr_b32 s12, s4, 24 ; GFX9-NEXT: s_and_b32 s4, s4, 0xff ; GFX9-NEXT: s_lshl_b32 s10, s10, 8 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX9-NEXT: s_or_b32 s4, s4, s10 ; GFX9-NEXT: s_and_b32 s10, s11, 0xff ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 -; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 ; GFX9-NEXT: s_or_b32 s4, s4, s10 -; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX9-NEXT: s_lshr_b32 s13, s5, 8 ; GFX9-NEXT: s_and_b32 s5, s5, 0xff -; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_lshl_b32 s5, s5, 8 ; GFX9-NEXT: s_and_b32 s10, s13, 0xff ; GFX9-NEXT: s_or_b32 s5, s12, s5 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 ; GFX9-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 -; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX9-NEXT: s_or_b32 s5, s5, s10 -; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: s_or_b32 s5, s5, s10 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1 +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_sub_u32_e32 v3, 23, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_sub_u32_e32 v3, 23, v1 ; GFX9-NEXT: s_lshl_b32 s4, s7, 17 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 -; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX9-NEXT: s_or_b32 s0, s4, s0 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s2 -; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 -; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0 -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s2 +; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0 +; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX9-NEXT: s_and_b32 s9, 0xffff, s9 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 ; GFX9-NEXT: s_lshl_b32 s0, s9, 17 ; GFX9-NEXT: s_lshl_b32 s1, s1, 1 -; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX9-NEXT: s_or_b32 s0, s0, s1 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s3 +; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s3 ; GFX9-NEXT: s_mov_b32 s6, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0 ; GFX9-NEXT: s_mov_b32 s8, 16 ; GFX9-NEXT: s_movk_i32 s0, 0xff -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX9-NEXT: v_and_or_b32 v2, v0, s0, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX9-NEXT: v_and_or_b32 v2, v1, s0, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 -; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX9-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, 8, v2 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_fshr_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX10-NEXT: s_lshr_b32 s9, s1, 8 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff ; GFX10-NEXT: s_lshr_b32 s6, s0, 8 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff ; GFX10-NEXT: s_or_b32 s1, s8, s1 @@ -2210,123 +2202,121 @@ ; GFX10-NEXT: s_lshr_b32 s7, s0, 16 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff ; GFX10-NEXT: s_or_b32 s0, s0, s6 +; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: s_and_b32 s6, s7, 0xff -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: s_and_b32 s7, s9, 0xff ; GFX10-NEXT: s_lshr_b32 s9, s4, 16 +; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: s_lshr_b32 s10, s4, 24 -; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff ; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_lshr_b32 s11, s5, 8 +; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 +; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 ; GFX10-NEXT: s_or_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s8, s9, 0xff -; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX10-NEXT: s_lshr_b32 s11, s5, 8 ; GFX10-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX10-NEXT: s_and_b32 s5, s5, 0xff +; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX10-NEXT: v_mul_hi_u32 v3, v0, v3 ; GFX10-NEXT: s_lshl_b32 s8, s8, 16 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8 ; GFX10-NEXT: s_or_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s8, s11, 0xff -; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 ; GFX10-NEXT: s_or_b32 s5, s10, s5 ; GFX10-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v3 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 ; GFX10-NEXT: s_lshl_b32 s8, s8, 16 ; GFX10-NEXT: s_lshr_b32 s9, s2, 8 +; GFX10-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX10-NEXT: s_or_b32 s5, s5, s8 ; GFX10-NEXT: s_lshr_b32 s8, s2, 16 -; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX10-NEXT: s_and_b32 s9, s9, 0xff ; GFX10-NEXT: s_lshr_b32 s10, s2, 24 -; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX10-NEXT: s_lshr_b32 s11, s3, 8 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff +; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX10-NEXT: s_lshl_b32 s9, s9, 8 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff -; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX10-NEXT: s_and_b32 s3, s3, 0xff ; GFX10-NEXT: s_or_b32 s2, s2, s9 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 -; GFX10-NEXT: s_and_b32 s4, 0xffff, s8 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX10-NEXT: s_and_b32 s5, s11, 0xff +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1 +; GFX10-NEXT: s_and_b32 s4, 0xffff, s8 ; GFX10-NEXT: s_or_b32 s3, s10, s3 -; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v0, s5, v0 +; GFX10-NEXT: s_and_b32 s5, s11, 0xff ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX10-NEXT: s_lshl_b32 s4, s4, 16 +; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 +; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX10-NEXT: s_lshl_b32 s5, s5, 16 -; GFX10-NEXT: s_or_b32 s2, s2, s4 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 -; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX10-NEXT: s_or_b32 s2, s2, s4 +; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 ; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_or_b32 s3, s3, s5 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_lshl_b32 s4, s6, 17 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1 -; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX10-NEXT: s_or_b32 s0, s4, s0 ; GFX10-NEXT: s_lshl_b32 s1, s1, 1 -; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 -; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s2 +; GFX10-NEXT: s_or_b32 s0, s4, s0 +; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0 +; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s2 ; GFX10-NEXT: s_lshl_b32 s2, s7, 17 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s3 -; GFX10-NEXT: v_lshl_or_b32 v0, s0, v3, v0 +; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s3 +; GFX10-NEXT: v_lshl_or_b32 v1, s0, v3, v1 ; GFX10-NEXT: s_or_b32 s0, s2, s1 -; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX10-NEXT: v_lshl_or_b32 v0, s0, v2, v0 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: s_mov_b32 s0, 16 -; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX10-NEXT: v_and_or_b32 v2, v0, 0xff, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX10-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX10-NEXT: v_and_or_b32 v2, v1, 0xff, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_lshl_or_b32 v0, v0, 8, v4 +; GFX10-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX10-NEXT: v_readfirstlane_b32 s1, v0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: s_fshr_v2i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX11-NEXT: s_lshr_b32 s6, s0, 8 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff -; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX11-NEXT: s_lshr_b32 s8, s0, 24 +; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff ; GFX11-NEXT: s_lshl_b32 s6, s6, 8 ; GFX11-NEXT: s_lshr_b32 s9, s1, 8 @@ -2334,122 +2324,124 @@ ; GFX11-NEXT: s_and_b32 s6, s7, 0xff ; GFX11-NEXT: s_and_b32 s7, s9, 0xff ; GFX11-NEXT: s_lshr_b32 s9, s4, 8 -; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1 ; GFX11-NEXT: s_lshr_b32 s10, s4, 16 ; GFX11-NEXT: s_and_b32 s9, s9, 0xff +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX11-NEXT: s_and_b32 s11, s4, 0xff -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX11-NEXT: s_lshl_b32 s9, s9, 8 ; GFX11-NEXT: s_and_b32 s10, s10, 0xff ; GFX11-NEXT: s_or_b32 s9, s11, s9 -; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 +; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX11-NEXT: s_and_b32 s10, 0xffff, s10 ; GFX11-NEXT: s_and_b32 s9, 0xffff, s9 ; GFX11-NEXT: s_lshl_b32 s10, s10, 16 ; GFX11-NEXT: s_lshr_b32 s11, s5, 8 -; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 ; GFX11-NEXT: s_and_b32 s5, s5, 0xff -; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX11-NEXT: s_lshr_b32 s4, s4, 24 +; GFX11-NEXT: s_or_b32 s9, s9, s10 ; GFX11-NEXT: s_lshl_b32 s5, s5, 8 ; GFX11-NEXT: s_and_b32 s10, s11, 0xff ; GFX11-NEXT: s_or_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s5, 0xffff, s10 +; GFX11-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX11-NEXT: s_lshl_b32 s5, s5, 16 -; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2 -; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3 -; GFX11-NEXT: s_or_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff -; GFX11-NEXT: s_lshr_b32 s10, s2, 16 -; GFX11-NEXT: v_mul_hi_u32 v0, s9, v0 +; GFX11-NEXT: s_or_b32 s4, s4, s5 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-NEXT: s_lshr_b32 s5, s2, 24 -; GFX11-NEXT: s_or_b32 s1, s8, s1 +; GFX11-NEXT: s_lshr_b32 s10, s2, 16 ; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-NEXT: s_or_b32 s1, s8, s1 ; GFX11-NEXT: s_lshr_b32 s8, s2, 8 -; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshr_b32 s5, s2, 24 +; GFX11-NEXT: v_mul_hi_u32 v1, s9, v1 +; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 ; GFX11-NEXT: s_and_b32 s8, s8, 0xff -; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX11-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff ; GFX11-NEXT: s_lshl_b32 s8, s8, 8 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX11-NEXT: s_or_b32 s2, s2, s8 ; GFX11-NEXT: s_and_b32 s8, s10, 0xff -; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX11-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX11-NEXT: v_sub_nc_u32_e32 v0, s9, v0 ; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX11-NEXT: v_mul_hi_u32 v2, v0, v3 +; GFX11-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX11-NEXT: s_or_b32 s2, s2, s8 +; GFX11-NEXT: s_lshl_b32 s0, s0, 1 +; GFX11-NEXT: v_sub_nc_u32_e32 v1, s9, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2 ; GFX11-NEXT: s_lshr_b32 s9, s3, 8 ; GFX11-NEXT: s_and_b32 s3, s3, 0xff -; GFX11-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX11-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX11-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-NEXT: s_or_b32 s2, s2, s8 -; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1 -; GFX11-NEXT: s_and_b32 s4, s9, 0xff -; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX11-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX11-NEXT: s_or_b32 s3, s5, s3 -; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 -; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX11-NEXT: s_lshl_b32 s4, s4, 16 ; GFX11-NEXT: s_lshl_b32 s5, s6, 17 -; GFX11-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX11-NEXT: s_or_b32 s0, s5, s0 -; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX11-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX11-NEXT: s_lshl_b32 s1, s1, 1 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_sub_nc_u32_e32 v0, s4, v0 +; GFX11-NEXT: s_and_b32 s4, s9, 0xff +; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v1 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v0 -; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2 -; GFX11-NEXT: s_or_b32 s2, s3, s4 ; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX11-NEXT: s_or_b32 s2, s3, s4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3 +; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2 ; GFX11-NEXT: s_lshl_b32 s0, s7, 17 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: s_or_b32 s0, s0, s1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_bfe_u32 v3, v0, 8, 8 -; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX11-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v3 -; GFX11-NEXT: v_bfe_u32 v3, v0, 16, 8 -; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v1 +; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v2 +; GFX11-NEXT: v_and_or_b32 v1, v1, 0xff, v2 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 24, v4 -; GFX11-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX11-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX11-NEXT: v_or3_b32 v0, v0, v2, v3 +; GFX11-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX11-NEXT: v_or3_b32 v1, v1, v2, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_lshl_or_b32 v0, v0, 8, v4 +; GFX11-NEXT: v_readfirstlane_b32 s0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s1, v0 ; GFX11-NEXT: ; return to shader part epilog %lhs = bitcast i48 %lhs.arg to <2 x i24> %rhs = bitcast i48 %rhs.arg to <2 x i24> @@ -2465,42 +2457,40 @@ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8 +; GFX6-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 +; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX6-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX6-NEXT: v_cvt_u32_f32_e32 v7, v7 ; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX6-NEXT: v_mul_lo_u32 v8, v6, v8 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v9 -; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 -; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GFX6-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7 +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_mul_lo_u32 v6, v8, v7 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4 ; GFX6-NEXT: v_and_b32_e32 v7, 0xffffff, v7 -; GFX6-NEXT: v_mul_hi_u32 v6, v8, v6 +; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 @@ -2521,42 +2511,40 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8 +; GFX8-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 +; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX8-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX8-NEXT: v_cvt_u32_f32_e32 v7, v7 ; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX8-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX8-NEXT: v_mul_lo_u32 v8, v6, v8 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX8-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 -; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v8, v9 -; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 -; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 +; GFX8-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7 +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v8, v7 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 +; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4 ; GFX8-NEXT: v_and_b32_e32 v7, 0xffffff, v7 -; GFX8-NEXT: v_mul_hi_u32 v6, v8, v6 +; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v7, v0 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 -; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 @@ -2577,42 +2565,40 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_mul_lo_u32 v8, v6, v7 ; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5 -; GFX9-NEXT: v_mul_lo_u32 v7, v9, v7 +; GFX9-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 +; GFX9-NEXT: v_mul_lo_u32 v8, v6, v8 ; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3 -; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 -; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX9-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 -; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v9 +; GFX9-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7 +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6 +; GFX9-NEXT: v_and_b32_e32 v7, 0xffffff, v7 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 -; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, v7, v2 +; GFX9-NEXT: v_sub_u32_e32 v2, v5, v6 ; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc @@ -2630,31 +2616,29 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 ; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 -; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 -; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 +; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX10-NEXT: v_mul_hi_u32 v8, v7, v8 +; GFX10-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v9 +; GFX10-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX10-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 -; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7 +; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 @@ -2683,40 +2667,37 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 1, v1 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v7, v7 -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1 -; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX11-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v9 ; GFX11-NEXT: v_cvt_u32_f32_e32 v7, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 +; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX11-NEXT: v_mul_hi_u32 v9, v7, v9 +; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX11-NEXT: v_mul_hi_u32 v8, v7, v8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v9 +; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX11-NEXT: v_mul_hi_u32 v7, v4, v7 ; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v7 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 -; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v6 ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) @@ -2729,20 +2710,20 @@ ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4 ; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, v5, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v6, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v1, v1, v4, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -25,12 +25,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -63,12 +65,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -101,12 +105,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -139,12 +145,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_p3_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_p3_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -177,12 +185,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_p5_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_p5_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -216,12 +226,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_v2s16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -260,12 +272,14 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -296,14 +310,6 @@ bb.0: liveins: $vgpr0 - ; GFX6-LABEL: name: load_private_s32_from_1_gep_2047_known_bits - ; GFX6: liveins: $vgpr0 - ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -312,6 +318,7 @@ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -353,12 +360,14 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -397,6 +406,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -405,6 +415,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -443,6 +454,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -451,6 +463,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -489,12 +502,14 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -533,6 +548,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -541,6 +557,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -579,6 +596,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -587,6 +605,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -625,6 +644,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -633,6 +653,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -671,6 +692,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -679,6 +701,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -717,6 +740,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -725,6 +749,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -763,6 +788,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -771,6 +797,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -809,6 +836,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -817,6 +845,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -849,9 +878,11 @@ ; GFX6-LABEL: name: load_private_s32_from_4_constant_0 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX9-LABEL: name: load_private_s32_from_4_constant_0 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX11-LABEL: name: load_private_s32_from_4_constant_0 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) @@ -878,9 +909,11 @@ ; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX11-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX11: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 16 ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) @@ -907,9 +940,11 @@ ; GFX6-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) @@ -937,10 +972,12 @@ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) @@ -969,9 +1006,11 @@ ; GFX6-LABEL: name: load_private_s32_from_fi ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_fi ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_fi ; GFX11: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] @@ -998,9 +1037,11 @@ ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] @@ -1030,9 +1071,11 @@ ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] @@ -1066,12 +1109,14 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE_SVS:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SVS [[V_MOV_B32_e32_]], %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) @@ -1102,10 +1147,12 @@ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_neg1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_neg1 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -18,6 +18,7 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX8-LABEL: name: test_add_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX9-LABEL: name: test_add_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_add_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -66,6 +69,7 @@ ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_add_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -98,6 +102,7 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX8-LABEL: name: test_add_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -108,6 +113,7 @@ ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_add_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -152,6 +158,7 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_add_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -174,6 +181,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_add_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -208,6 +216,7 @@ ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY5]] ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) + ; ; GFX8-LABEL: name: test_add_v3s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-NEXT: {{ $}} @@ -227,6 +236,7 @@ ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ADD]](s16), implicit [[ADD1]](s16), implicit [[ADD2]](s16) + ; ; GFX9-LABEL: name: test_add_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} @@ -315,6 +325,7 @@ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: test_add_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -355,6 +366,7 @@ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_add_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -389,6 +401,7 @@ ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX8-LABEL: name: test_add_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -400,6 +413,7 @@ ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -432,6 +446,7 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX8-LABEL: name: test_add_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -440,10 +455,11 @@ ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX8-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_add_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -452,8 +468,8 @@ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 @@ -478,6 +494,7 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX8-LABEL: name: test_add_s24 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -485,6 +502,7 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX9-LABEL: name: test_add_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -517,6 +535,7 @@ ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX8-LABEL: name: test_add_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -528,6 +547,7 @@ ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_add_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -566,6 +586,7 @@ ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; ; GFX8-LABEL: name: test_add_s96 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} @@ -578,6 +599,7 @@ ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; ; GFX9-LABEL: name: test_add_s96 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -469,10 +469,9 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -531,10 +530,9 @@ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -543,38 +541,34 @@ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND5]](<4 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND3]](<4 x s16>) ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL5]] ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -620,8 +620,8 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -631,13 +631,12 @@ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C4]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) @@ -685,8 +684,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -18,6 +18,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s32_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -51,6 +53,7 @@ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; VI-LABEL: name: test_ashr_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -59,6 +62,7 @@ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s64 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -85,6 +89,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; VI-LABEL: name: test_ashr_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -92,6 +97,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9PLUS-NEXT: {{ $}} @@ -119,6 +125,7 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; VI-LABEL: name: test_ashr_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -128,6 +135,7 @@ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[AND]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9PLUS-NEXT: {{ $}} @@ -158,6 +166,7 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -168,6 +177,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s16_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -202,6 +212,7 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -212,6 +223,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s16_s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -247,26 +259,28 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s16_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s16_i8 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16) ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) @@ -296,13 +310,14 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_i8_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 @@ -311,13 +326,14 @@ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_i8_i8 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) @@ -349,13 +365,14 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s7_s7 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 @@ -364,13 +381,14 @@ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s7_s7 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) @@ -402,6 +420,7 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s24_s24 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -412,6 +431,7 @@ ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s24_s24 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -446,6 +466,7 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s32_s24 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -455,6 +476,7 @@ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[AND]](s32) ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s32_s24 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -488,6 +510,7 @@ ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_ashr_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -499,6 +522,7 @@ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s32_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -534,6 +558,7 @@ ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[UV5]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_ashr_v3s32_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -546,6 +571,7 @@ ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[UV5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v3s32_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -581,6 +607,7 @@ ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_ashr_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -592,6 +619,7 @@ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s64_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -629,6 +657,7 @@ ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_ashr_v3s64_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -643,6 +672,7 @@ ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9PLUS-NEXT: {{ $}} @@ -686,15 +716,15 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_ashr_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -717,6 +747,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s16_v2s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -756,6 +787,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_ashr_v2s16_v2s32 ; VI: liveins: $vgpr0, $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -777,6 +809,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s16_v2s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -823,34 +856,33 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND2]](s32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL2]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -890,13 +922,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -967,27 +999,26 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR2]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND1]](s32) ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[AND3]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[LSHR3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_ashr_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1028,6 +1059,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v4s16_v4s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -1056,13 +1088,13 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) @@ -1075,18 +1107,19 @@ ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s128 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) @@ -1099,18 +1132,19 @@ ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s128 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) @@ -1160,6 +1194,7 @@ ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s132 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1184,6 +1219,7 @@ ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s132 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1225,11 +1261,13 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_0 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_0 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1261,6 +1299,7 @@ ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_23 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1274,6 +1313,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_23 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1312,6 +1352,7 @@ ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_31 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1325,6 +1366,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_31 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1362,6 +1404,7 @@ ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1374,6 +1417,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1411,6 +1455,7 @@ ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_33 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1424,6 +1469,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_33 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1459,6 +1505,7 @@ ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[ASHR]](s64), [[ASHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_127 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1469,6 +1516,7 @@ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[ASHR]](s64), [[ASHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_127 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1496,71 +1544,71 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; SI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; SI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; SI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) - ; SI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; SI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) ; SI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; SI-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; SI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) + ; SI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) ; SI-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1576,76 +1624,77 @@ ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; VI-LABEL: name: test_ashr_s256_s256 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; VI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; VI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; VI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) - ; VI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; VI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) ; VI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; VI-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; VI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) + ; VI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) ; VI-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1661,76 +1710,77 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; GFX9PLUS-LABEL: name: test_ashr_s256_s256 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9PLUS-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9PLUS-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) + ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9PLUS-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9PLUS-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9PLUS-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; GFX9PLUS-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; GFX9PLUS-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9PLUS-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; GFX9PLUS-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; GFX9PLUS-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; GFX9PLUS-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; GFX9PLUS-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9PLUS-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; GFX9PLUS-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] ; GFX9PLUS-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; GFX9PLUS-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; GFX9PLUS-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) - ; GFX9PLUS-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; GFX9PLUS-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) ; GFX9PLUS-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; GFX9PLUS-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; GFX9PLUS-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; GFX9PLUS-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; GFX9PLUS-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; GFX9PLUS-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) + ; GFX9PLUS-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) ; GFX9PLUS-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; GFX9PLUS-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; GFX9PLUS-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1801,6 +1851,7 @@ ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; VI-LABEL: name: test_ashr_v2s128_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1843,6 +1894,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s128_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -1927,6 +1979,7 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_ashr_s65_s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -1957,6 +2010,7 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -2030,6 +2084,7 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_ashr_s65_s32_constant8 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -2059,6 +2114,7 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32_constant8 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9PLUS-NEXT: {{ $}} @@ -2133,6 +2189,7 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; VI-LABEL: name: test_ashr_s65_s32_known_pow2 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -2164,6 +2221,7 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32_known_pow2 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9PLUS-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -440,8 +440,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -529,8 +529,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -968,8 +968,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -1645,8 +1645,8 @@ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] @@ -2018,8 +2018,8 @@ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] @@ -2285,60 +2285,54 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL3]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR8]](s16) ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR9]](s16) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL5]] + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL5]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR10]](s16) ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT6]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL6]] + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR11]](s16) ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[SHL7]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR14]](s16) ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL8]] + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C2]] ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR15]](s16) ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL9]] + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[LSHR12]], [[SHL9]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR16]](s16) ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT10]], [[C]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL10]] + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL10]] ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C2]] ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR17]](s16) ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL11]] + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[LSHR13]], [[SHL11]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<24 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir @@ -98,11 +98,8 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE]], [[C]](s32) ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s32) = G_BITREVERSE [[LSHR]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -13,14 +13,13 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX8-LABEL: name: bswap_s8 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -59,6 +58,7 @@ ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: bswap_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -85,14 +85,13 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX8-LABEL: name: bswap_s24 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -120,6 +119,7 @@ ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]] ; GFX7-NEXT: $vgpr0 = COPY [[BSWAP]](s32) + ; ; GFX8-LABEL: name: bswap_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -157,8 +157,7 @@ ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -167,6 +166,7 @@ ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX8-LABEL: name: bswap_v2s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -221,6 +221,7 @@ ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; ; GFX8-LABEL: name: bswap_v3s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} @@ -294,24 +295,22 @@ ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[COPY4]](s32) ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX7-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY6]](s32) ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY7]](s32) ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; GFX7-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC6]] ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -326,6 +325,7 @@ ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: bswap_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -355,6 +355,7 @@ ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: bswap_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -384,6 +385,7 @@ ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] ; GFX7-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX8-LABEL: name: bswap_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -419,6 +421,7 @@ ; GFX7-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32) ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: bswap_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -156,14 +156,13 @@ ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND1]](s32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[LSHR]](s32) ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF1]], [[C]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -214,8 +213,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[CTLZ_ZERO_UNDEF]], [[UV]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 %2:_(s33) = G_CTLZ_ZERO_UNDEF %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -72,9 +72,7 @@ ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTLZ %0 %2:_(s32) = G_ZEXT %1 @@ -174,15 +172,14 @@ ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C2]] ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND1]](s32) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR]](s32) ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C2]] ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UMIN1]], [[C]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -237,8 +234,8 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UMIN]], [[UV]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 %2:_(s33) = G_CTLZ %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -14,8 +14,7 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 %2:_(s8) = G_CTPOP %1 @@ -36,8 +35,7 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s9) = G_TRUNC %0 %2:_(s9) = G_CTPOP %1 @@ -108,9 +106,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[CTPOP]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[CTPOP]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTPOP %0 %2:_(s32) = G_ZEXT %1 @@ -131,8 +127,7 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_CTPOP %1 @@ -197,13 +192,10 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[AND1]](s32) + ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[LSHR]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -226,8 +218,7 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 %2:_(s7) = G_CTPOP %1 @@ -267,12 +258,12 @@ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64) @@ -295,12 +286,12 @@ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -72,9 +72,7 @@ ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTTZ %0 %2:_(s32) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -18,12 +18,14 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; ; VI-LABEL: name: test_fabs_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; ; GFX9-LABEL: name: test_fabs_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -47,12 +49,14 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FABS]](s64) + ; ; VI-LABEL: name: test_fabs_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FABS]](s64) + ; ; GFX9-LABEL: name: test_fabs_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -77,6 +81,7 @@ ; SI-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fabs_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -85,6 +90,7 @@ ; VI-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fabs_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -115,6 +121,7 @@ ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fabs_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -124,6 +131,7 @@ ; VI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fabs_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -154,6 +162,7 @@ ; SI-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[UV2]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32), [[FABS2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fabs_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -164,6 +173,7 @@ ; VI-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[UV2]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32), [[FABS2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fabs_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -194,6 +204,7 @@ ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FABS]](s64), [[FABS1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fabs_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -203,6 +214,7 @@ ; VI-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FABS]](s64), [[FABS1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fabs_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -229,12 +241,14 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; ; VI-LABEL: name: test_fabs_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fabs_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -268,19 +282,19 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fabs_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) @@ -298,19 +312,19 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]] ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fabs_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) @@ -353,6 +367,7 @@ ; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fabs_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -362,6 +377,7 @@ ; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fabs_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -18,6 +18,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; VI-LABEL: name: test_fadd_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX9-LABEL: name: test_fadd_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -51,6 +53,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; VI-LABEL: name: test_fadd_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -58,6 +61,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; GFX9-LABEL: name: test_fadd_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -90,6 +94,7 @@ ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fadd_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -100,6 +105,7 @@ ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fadd_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -137,6 +143,7 @@ ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fadd_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -148,6 +155,7 @@ ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fadd_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -182,6 +190,7 @@ ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fadd_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -193,6 +202,7 @@ ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fadd_v2s32_flags ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -228,6 +238,7 @@ ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fadd_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -240,6 +251,7 @@ ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fadd_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -275,6 +287,7 @@ ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fadd_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} @@ -286,6 +299,7 @@ ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fadd_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -337,6 +351,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fadd_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -359,6 +374,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fadd_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -425,13 +441,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fadd_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -471,13 +487,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fadd_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -583,6 +599,7 @@ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fadd_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -623,6 +640,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fadd_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -25,6 +25,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -39,6 +40,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -79,6 +81,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -90,6 +93,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -124,6 +128,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_copysign_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -135,6 +140,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_copysign_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -172,6 +178,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_copysign_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -186,6 +193,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_copysign_s64_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -226,6 +234,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s64 ; VI: liveins: $vgpr0, $vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -240,6 +249,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s64 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -282,6 +292,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -298,6 +309,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -343,6 +355,7 @@ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND2]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -358,6 +371,7 @@ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND2]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -394,14 +408,15 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_copysign_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -410,14 +425,15 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_copysign_s64_s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -426,8 +442,8 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) @@ -463,6 +479,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s64 ; VI: liveins: $vgpr0, $vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -479,6 +496,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s64 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -527,6 +545,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY1]], [[BITCAST]] ; SI-NEXT: [[OR2:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](<2 x s16>) + ; ; VI-LABEL: name: test_copysign_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -545,6 +564,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY1]], [[BITCAST]] ; VI-NEXT: [[OR2:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_copysign_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -583,6 +603,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[BUILD_VECTOR]] ; SI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; VI-LABEL: name: test_copysign_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -596,6 +617,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[BUILD_VECTOR]] ; VI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_copysign_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -638,6 +660,7 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_copysign_v2s64_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} @@ -655,6 +678,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_copysign_v2s64_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -707,6 +731,7 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_copysign_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -730,6 +755,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_copysign_v2s64_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -787,6 +813,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] ; SI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; VI-LABEL: name: test_copysign_v2s32_v2s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -808,6 +835,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] ; VI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_copysign_v2s32_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -852,6 +880,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s32_flagss ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -863,6 +892,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s32_flagss ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -901,6 +931,7 @@ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND2]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s16_flags ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -916,6 +947,7 @@ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND2]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s16_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -961,6 +993,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = nnan G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s32_flags ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -977,6 +1010,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = nnan G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s32_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -19,6 +19,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; SI-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; VI-LABEL: name: test_fma_s32 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -27,6 +28,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; VI-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX9-LABEL: name: test_fma_s32 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; ; VI-LABEL: name: test_fma_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -63,6 +66,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; ; GFX9-LABEL: name: test_fma_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -100,6 +104,7 @@ ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fma_s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -112,6 +117,7 @@ ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fma_s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -155,6 +161,7 @@ ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fma_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -168,6 +175,7 @@ ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fma_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -208,6 +216,7 @@ ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fma_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; VI-NEXT: {{ $}} @@ -222,6 +231,7 @@ ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fma_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX9-NEXT: {{ $}} @@ -264,6 +274,7 @@ ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_fma_v4s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; VI-NEXT: {{ $}} @@ -279,6 +290,7 @@ ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_fma_v4s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: {{ $}} @@ -320,6 +332,7 @@ ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fma_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; VI-NEXT: {{ $}} @@ -333,6 +346,7 @@ ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fma_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: {{ $}} @@ -394,6 +408,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_fma_v2s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -421,6 +436,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fma_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -501,13 +517,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fma_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; VI-NEXT: {{ $}} @@ -555,13 +571,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fma_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX9-NEXT: {{ $}} @@ -688,6 +704,7 @@ ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fma_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -738,6 +755,7 @@ ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fma_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -23,6 +23,7 @@ ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -32,6 +33,7 @@ ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -63,6 +65,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -70,6 +73,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -96,6 +100,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_nnan ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -103,6 +108,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -131,6 +137,7 @@ ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -139,6 +146,7 @@ ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -168,6 +176,7 @@ ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_nnan_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -176,6 +185,7 @@ ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -203,6 +213,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -210,6 +221,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -238,6 +250,7 @@ ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; ; VI-LABEL: name: test_fmaxnum_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -247,6 +260,7 @@ ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; ; GFX9-LABEL: name: test_fmaxnum_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -281,6 +295,7 @@ ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fmaxnum_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -293,6 +308,7 @@ ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -335,6 +351,7 @@ ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fmaxnum_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -350,6 +367,7 @@ ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fmaxnum_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -405,6 +423,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fmaxnum_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -431,6 +450,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fmaxnum_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -499,13 +519,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fmaxnum_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -551,13 +571,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fmaxnum_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -667,6 +687,7 @@ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fmaxnum_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -715,6 +736,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fmaxnum_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -758,6 +780,7 @@ ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; ; VI-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -771,6 +794,7 @@ ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -812,6 +836,7 @@ ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -823,6 +848,7 @@ ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -864,6 +890,7 @@ ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -877,6 +904,7 @@ ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -918,6 +946,7 @@ ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -929,6 +958,7 @@ ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -965,6 +995,7 @@ ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -973,6 +1004,7 @@ ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1020,6 +1052,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -1042,6 +1075,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -23,6 +23,7 @@ ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -32,6 +33,7 @@ ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -63,6 +65,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_ieee_mode_off ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -70,6 +73,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_off ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -96,6 +100,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_nnan ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -103,6 +108,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_nnan ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -131,6 +137,7 @@ ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_nnan_lhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -139,6 +146,7 @@ ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -168,6 +176,7 @@ ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_nnan_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -176,6 +185,7 @@ ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_nnan_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -203,6 +213,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -210,6 +221,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -238,6 +250,7 @@ ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; ; VI-LABEL: name: test_fminnum_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -247,6 +260,7 @@ ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; ; GFX9-LABEL: name: test_fminnum_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -281,6 +295,7 @@ ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fminnum_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -293,6 +308,7 @@ ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fminnum_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -335,6 +351,7 @@ ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fminnum_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -350,6 +367,7 @@ ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fminnum_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -405,6 +423,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fminnum_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -431,6 +450,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fminnum_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -499,13 +519,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fminnum_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -551,13 +571,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -667,6 +687,7 @@ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fminnum_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -715,6 +736,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -758,6 +780,7 @@ ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; ; VI-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -771,6 +794,7 @@ ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; ; GFX9-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -812,6 +836,7 @@ ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; ; VI-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -823,6 +848,7 @@ ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -864,6 +890,7 @@ ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -877,6 +904,7 @@ ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -918,6 +946,7 @@ ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -929,6 +958,7 @@ ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -965,6 +995,7 @@ ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; VI-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -973,6 +1004,7 @@ ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; ; GFX9-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1020,6 +1052,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -1042,6 +1075,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -18,6 +18,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; ; VI-LABEL: name: test_fmul_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FMUL]](s32) + ; ; GFX9PLUS-LABEL: name: test_fmul_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -50,6 +52,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMUL]](s64) + ; ; VI-LABEL: name: test_fmul_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -57,6 +60,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMUL]](s64) + ; ; GFX9PLUS-LABEL: name: test_fmul_s64 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -89,6 +93,7 @@ ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fmul_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -99,6 +104,7 @@ ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_fmul_s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -136,6 +142,7 @@ ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fmul_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -147,6 +154,7 @@ ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -181,6 +189,7 @@ ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fmul_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -192,6 +201,7 @@ ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v2s32_flags ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -227,6 +237,7 @@ ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fmul_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -239,6 +250,7 @@ ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -274,6 +286,7 @@ ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMUL]](s64), [[FMUL1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fmul_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} @@ -285,6 +298,7 @@ ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMUL]](s64), [[FMUL1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v2s64 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9PLUS-NEXT: {{ $}} @@ -336,6 +350,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fmul_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -358,6 +373,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v2s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -425,13 +441,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fmul_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -471,13 +487,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v3s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -583,6 +599,7 @@ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fmul_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -623,6 +640,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9PLUS-LABEL: name: test_fmul_v4s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -17,12 +17,14 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; ; VI-LABEL: name: test_fneg_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; ; GFX9-LABEL: name: test_fneg_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -45,12 +47,14 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + ; ; VI-LABEL: name: test_fneg_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + ; ; GFX9-LABEL: name: test_fneg_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -75,6 +79,7 @@ ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fneg_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -83,6 +88,7 @@ ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fneg_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -113,6 +119,7 @@ ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fneg_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -122,6 +129,7 @@ ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fneg_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -152,6 +160,7 @@ ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[UV2]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fneg_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -162,6 +171,7 @@ ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[UV2]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fneg_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -192,6 +202,7 @@ ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FNEG]](s64), [[FNEG1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fneg_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -201,6 +212,7 @@ ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FNEG]](s64), [[FNEG1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fneg_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -227,12 +239,14 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; ; VI-LABEL: name: test_fneg_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fneg_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -266,10 +280,10 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[LSHR]](s32), [[AND2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fneg_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) @@ -287,10 +301,10 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[LSHR]](s32), [[AND2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fneg_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) @@ -306,9 +320,8 @@ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FNEG %0 @@ -331,6 +344,7 @@ ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV1]] ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fneg_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -340,6 +354,7 @@ ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV1]] ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fneg_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -256,22 +256,19 @@ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR5]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -441,15 +438,12 @@ ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY3]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; @@ -464,20 +458,19 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[C3]](s16) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND6]], [[AND5]](s16) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -494,20 +487,19 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[C3]](s16) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND6]], [[AND5]](s16) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C2]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -539,33 +531,31 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -582,33 +572,31 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -625,33 +613,31 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) @@ -709,36 +695,32 @@ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC4]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY6]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR5]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC6]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY7]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY7]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR7]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC8]] ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -752,14 +734,13 @@ ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL5]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL5]] ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) @@ -833,10 +814,9 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] - ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL5]] ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) @@ -957,50 +937,44 @@ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY4]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY4]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC3]], [[C2]] - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY5]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY5]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -215,22 +215,19 @@ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) @@ -249,34 +246,32 @@ ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY9]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) @@ -472,14 +467,11 @@ ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; @@ -494,19 +486,18 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND2]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND3]](s16) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND2]](s16) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -523,19 +514,18 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C2]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND2]](s16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND3]](s16) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND2]](s16) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[AND3]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -567,30 +557,28 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C5]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -609,30 +597,28 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C5]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -651,30 +637,28 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C5]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] @@ -729,22 +713,19 @@ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY6]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) @@ -761,48 +742,45 @@ ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY11]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY11]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR7]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY12]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY12]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY13]](s32) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) - ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C5]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT9]](s32) + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY13]](s32) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR11]], [[ZEXT9]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) @@ -815,19 +793,18 @@ ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BITCAST6]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C2]] + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C2]] ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C3]] - ; SI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16) ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[SHL8]], [[C5]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY17]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) - ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT11]](s32) + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[SHL8]], [[C5]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY17]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR13]], [[ZEXT11]](s32) ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC13]], [[TRUNC14]] ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -841,14 +818,13 @@ ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL11]] ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; SI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C]](s32) + ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL12]] ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C5]] - ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]] + ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C]](s32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR15]], [[SHL13]] ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) @@ -957,10 +933,9 @@ ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL12]] ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C4]] - ; VI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C4]] - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL13]] + ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C4]] + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR15]], [[SHL13]] ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) @@ -1070,22 +1045,19 @@ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) @@ -1104,34 +1076,32 @@ ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY9]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) @@ -1143,32 +1113,29 @@ ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY10]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) - ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C5]] - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT11]](s32) + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY10]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LSHR14]], [[ZEXT11]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] - ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32) ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY11]](s32) - ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) - ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C5]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT13]](s32) + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LSHR13]], [[COPY11]](s32) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR16]], [[ZEXT13]](s32) ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) @@ -1187,34 +1154,32 @@ ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] - ; SI-NEXT: [[AND25:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] - ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) + ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[COPY16]](s32) - ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND25]](s16) - ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C5]] - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[AND27]], [[ZEXT15]](s32) + ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY16]](s32) + ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LSHR20]], [[ZEXT15]](s32) ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] - ; SI-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] + ; SI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] - ; SI-NEXT: [[AND29:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] - ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND28]](s16) + ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] + ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[COPY17]](s32) - ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND29]](s16) - ; SI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C5]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND31]], [[ZEXT17]](s32) + ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY17]](s32) + ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR22]], [[ZEXT17]](s32) ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) ; SI-NEXT: [[OR10:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -18,6 +18,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FSUB]](s32) + ; ; VI-LABEL: name: test_fsub_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FSUB]](s32) + ; ; GFX9-LABEL: name: test_fsub_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -51,6 +53,7 @@ ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; VI-LABEL: name: test_fsub_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -59,6 +62,7 @@ ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; GFX9-LABEL: name: test_fsub_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -87,6 +91,7 @@ ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; VI-LABEL: name: test_fsub_s64_fmf ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -95,6 +100,7 @@ ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; GFX9-LABEL: name: test_fsub_s64_fmf ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -129,6 +135,7 @@ ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fsub_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -139,6 +146,7 @@ ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSUB]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fsub_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -176,6 +184,7 @@ ; SI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fsub_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -187,6 +196,7 @@ ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fsub_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -221,6 +231,7 @@ ; SI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fsub_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -232,6 +243,7 @@ ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fsub_v2s32_flags ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -267,6 +279,7 @@ ; SI-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fsub_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -279,6 +292,7 @@ ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSUB]](s32), [[FSUB1]](s32), [[FSUB2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fsub_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -316,6 +330,7 @@ ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fsub_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} @@ -329,6 +344,7 @@ ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fsub_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -383,6 +399,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fsub_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -406,6 +423,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fsub_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -476,13 +494,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fsub_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -522,13 +540,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fsub_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -638,6 +656,7 @@ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fsub_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -678,6 +697,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fsub_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -18,6 +18,7 @@ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX8-LABEL: name: test_icmp_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -26,6 +27,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX9-LABEL: name: test_icmp_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -54,6 +56,7 @@ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[COPY]] ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: test_icmp_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -62,6 +65,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[COPY]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: test_icmp_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -95,6 +99,7 @@ ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[TRUNC]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -105,6 +110,7 @@ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[TRUNC]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -135,37 +141,36 @@ ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX7-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_s8 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_s8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[TRUNC]] @@ -191,31 +196,30 @@ ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX8-LABEL: name: test_icmp_s24 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX9-LABEL: name: test_icmp_s24 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[AND]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[AND]] ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](s32) %0:_(s24) = G_CONSTANT i24 0 @@ -247,6 +251,7 @@ ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -262,6 +267,7 @@ ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -270,8 +276,8 @@ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] @@ -310,6 +316,7 @@ ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v3s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -329,6 +336,7 @@ ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -339,8 +347,8 @@ ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] @@ -384,6 +392,7 @@ ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v4s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -407,6 +416,7 @@ ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v4s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -419,8 +429,8 @@ ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] @@ -452,6 +462,7 @@ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p0 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -460,6 +471,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -488,6 +500,7 @@ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p1), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -496,6 +509,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p1), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -525,6 +539,7 @@ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p2), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p2 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -533,6 +548,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p2), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -562,6 +578,7 @@ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p3 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -570,6 +587,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -598,6 +616,7 @@ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p4), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -606,6 +625,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p4), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -635,6 +655,7 @@ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p5 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -643,6 +664,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p5 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -671,6 +693,7 @@ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p999), [[COPY1]] ; GFX7-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX7-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX8-LABEL: name: test_icmp_p999 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -679,6 +702,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p999), [[COPY1]] ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_icmp_p999 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -714,6 +738,7 @@ ; GFX7-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v2p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -729,6 +754,7 @@ ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v2p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -771,6 +797,7 @@ ; GFX7-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v2p999 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -786,6 +813,7 @@ ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v2p999 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -829,15 +857,14 @@ ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND]](s32), [[AND1]] - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND2]](s32), [[AND3]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LSHR]](s32), [[LSHR1]] ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; GFX7-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] ; GFX7-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV3]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} @@ -862,6 +889,7 @@ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -904,25 +932,21 @@ ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[C]] ; GFX7-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) + ; ; GFX8-LABEL: name: test_icmp_s33 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[C]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) + ; ; GFX9-LABEL: name: test_icmp_s33 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[C]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[ICMP]](s1) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -441,14 +441,12 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -490,19 +488,16 @@ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[UV6]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -890,8 +890,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -977,14 +976,12 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1023,8 +1020,7 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) @@ -1057,8 +1053,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -1094,14 +1089,12 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1139,10 +1132,8 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1175,14 +1166,12 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1219,10 +1208,8 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1251,8 +1238,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>) @@ -1308,8 +1294,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -1365,8 +1350,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>) @@ -1400,10 +1384,8 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1429,8 +1411,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) @@ -1459,8 +1440,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST2]](<2 x s16>) @@ -1495,10 +1475,9 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1524,8 +1503,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[UV1]](<2 x s16>) @@ -1558,10 +1536,8 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) @@ -1586,8 +1562,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir @@ -28,6 +28,7 @@ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX8-LABEL: name: test_intrinsic_round_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -45,6 +46,7 @@ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX9-LABEL: name: test_intrinsic_round_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -90,6 +92,7 @@ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX8-LABEL: name: test_intrinsic_round_s32_flags ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -107,6 +110,7 @@ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX9-LABEL: name: test_intrinsic_round_s32_flags ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -172,6 +176,7 @@ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C8]] ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; ; GFX8-LABEL: name: test_intrinsic_round_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -190,6 +195,7 @@ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; ; GFX9-LABEL: name: test_intrinsic_round_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -246,6 +252,7 @@ ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_intrinsic_round_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -273,6 +280,7 @@ ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_intrinsic_round_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -370,6 +378,7 @@ ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[SELECT4]], [[SELECT5]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: test_intrinsic_round_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -399,6 +408,7 @@ ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_intrinsic_round_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -469,6 +479,7 @@ ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: test_intrinsic_round_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -488,6 +499,7 @@ ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_intrinsic_round_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -577,6 +589,7 @@ ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX8-LABEL: name: test_intrinsic_round_v2s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -612,6 +625,7 @@ ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_intrinsic_round_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -742,13 +756,13 @@ ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: test_intrinsic_round_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -805,13 +819,13 @@ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX8-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX8-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX8-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_intrinsic_round_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -985,6 +999,7 @@ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: test_intrinsic_round_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -1047,6 +1062,7 @@ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_intrinsic_round_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -235,14 +235,13 @@ ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX81-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX81-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX81-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX81-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX81-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; GFX81-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX81-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; GFX81-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX81-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX81-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir @@ -165,30 +165,24 @@ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GCN-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; GCN-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C2]](s32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]] - ; GCN-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GCN-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; GCN-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GCN-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]] + ; GCN-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GCN-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GCN-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GCN-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]] - ; GCN-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; GCN-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) - ; GCN-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GCN-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C2]](s32) + ; GCN-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL3]] ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GCN-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]] - ; GCN-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; GCN-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) - ; GCN-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]] + ; GCN-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] + ; GCN-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; GCN-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GCN-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] - ; GCN-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] - ; GCN-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GCN-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; GCN-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR8]], [[C2]](s32) + ; GCN-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[SHL5]] ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GCN-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -19,6 +19,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_constant_s1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -27,6 +28,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_constant_s2_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -63,6 +66,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s2_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -89,12 +93,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -119,12 +125,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -149,12 +157,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -179,12 +189,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -215,6 +227,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_constant_s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -227,6 +240,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -257,12 +271,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -292,6 +308,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_constant_s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -304,6 +321,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s32_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -349,6 +367,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_constant_s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -371,6 +390,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s32_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -410,12 +430,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s24_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s24_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -440,12 +462,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_s24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s24_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -476,6 +500,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_constant_s24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -488,6 +513,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s24_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -530,6 +556,7 @@ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; VI-LABEL: name: test_load_constant_s24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -548,6 +575,7 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-LABEL: name: test_load_constant_s24_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -586,6 +614,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; CI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; VI-LABEL: name: test_load_constant_s48_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -594,6 +623,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; GFX9-LABEL: name: test_load_constant_s48_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -620,12 +650,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_constant_s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-LABEL: name: test_load_constant_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -649,12 +681,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_constant_s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-LABEL: name: test_load_constant_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -696,6 +730,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; VI-LABEL: name: test_load_constant_s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -720,6 +755,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX9-LABEL: name: test_load_constant_s64_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -797,6 +833,7 @@ ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; VI-LABEL: name: test_load_constant_s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -839,6 +876,7 @@ ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX9-LABEL: name: test_load_constant_s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -899,6 +937,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_constant_s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -906,6 +945,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_constant_s96_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -931,6 +971,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_constant_s96_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -938,6 +979,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 8, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_constant_s96_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -963,6 +1005,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_constant_s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -970,6 +1013,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_constant_s96_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1016,6 +1060,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_constant_s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1044,6 +1089,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_constant_s96_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1137,6 +1183,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_constant_s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1191,6 +1238,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_constant_s96_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1268,6 +1316,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; VI-LABEL: name: test_load_constant_s160_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1280,6 +1329,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; GFX9-LABEL: name: test_load_constant_s160_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1318,6 +1368,7 @@ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; VI-LABEL: name: test_load_constant_s224_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1333,6 +1384,7 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; GFX9-LABEL: name: test_load_constant_s224_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1369,6 +1421,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_constant_s128_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1376,6 +1429,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_constant_s128_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1401,6 +1455,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_constant_s128_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1408,6 +1463,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_constant_s128_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1495,6 +1551,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_constant_s128_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1564,6 +1621,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_constant_s128_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1651,6 +1709,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; VI-LABEL: name: test_load_constant_s256_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1658,6 +1717,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), align 16, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; GFX9-LABEL: name: test_load_constant_s256_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1682,12 +1742,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_constant_p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: test_load_constant_p1_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1711,12 +1773,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_constant_p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: test_load_constant_p1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1777,6 +1841,7 @@ ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; VI-LABEL: name: test_load_constant_p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1820,6 +1885,7 @@ ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-LABEL: name: test_load_constant_p1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1880,12 +1946,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; VI-LABEL: name: test_load_constant_p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: test_load_constant_p3_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1909,12 +1977,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; VI-LABEL: name: test_load_constant_p4_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-LABEL: name: test_load_constant_p4_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1938,12 +2008,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; VI-LABEL: name: test_load_constant_p4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-LABEL: name: test_load_constant_p4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1986,6 +2058,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; VI-LABEL: name: test_load_constant_p4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2011,6 +2084,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; GFX9-LABEL: name: test_load_constant_p4_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2090,6 +2164,7 @@ ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; VI-LABEL: name: test_load_constant_p4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2133,6 +2208,7 @@ ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; GFX9-LABEL: name: test_load_constant_p4_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2193,12 +2269,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; VI-LABEL: name: test_load_constant_p5_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-LABEL: name: test_load_constant_p5_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2229,6 +2307,7 @@ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_constant_p5_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2242,6 +2321,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_constant_p5_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2289,6 +2369,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_constant_p5_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2312,6 +2393,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_constant_p5_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2352,12 +2434,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v2s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2383,12 +2467,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v2s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2420,6 +2506,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_constant_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2432,6 +2519,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v2s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2467,8 +2555,8 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -2488,6 +2576,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_constant_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2498,8 +2587,8 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -2517,6 +2606,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v3s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2527,8 +2617,8 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -2579,8 +2669,8 @@ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -2600,6 +2690,7 @@ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; VI-LABEL: name: test_load_constant_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2620,8 +2711,8 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -2639,6 +2730,7 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v3s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2659,8 +2751,8 @@ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -2697,12 +2789,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_constant_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v4s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2733,6 +2827,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_constant_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2745,6 +2840,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v4s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2791,6 +2887,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_constant_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2813,6 +2910,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_constant_v4s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2853,12 +2951,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_constant_v8s8_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v8s8_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2908,6 +3008,7 @@ ; CI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; ; VI-LABEL: name: test_load_constant_v16s8_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2939,6 +3040,7 @@ ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; ; GFX9-LABEL: name: test_load_constant_v16s8_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3056,6 +3158,7 @@ ; CI-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; ; VI-LABEL: name: test_load_constant_v32s8_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3107,6 +3210,7 @@ ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR7]](<4 x s32>) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; ; GFX9-LABEL: name: test_load_constant_v32s8_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3224,12 +3328,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; VI-LABEL: name: test_load_constant_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v2s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3263,6 +3369,7 @@ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_constant_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3279,6 +3386,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v2s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3328,6 +3436,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_constant_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3354,6 +3463,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v2s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3406,13 +3516,13 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_constant_v3s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3432,13 +3542,13 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v3s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3501,13 +3611,13 @@ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_constant_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3536,13 +3646,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v3s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3612,13 +3722,13 @@ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_constant_v3s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3647,13 +3757,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v3s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3737,13 +3847,13 @@ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_constant_v3s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3786,13 +3896,13 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v3s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3854,12 +3964,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_constant_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3883,12 +3995,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_constant_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3934,6 +4048,7 @@ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_constant_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3962,6 +4077,7 @@ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v4s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4041,6 +4157,7 @@ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_constant_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4087,6 +4204,7 @@ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v4s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4145,6 +4263,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; VI-LABEL: name: test_load_constant_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4152,6 +4271,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-LABEL: name: test_load_constant_v8s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4176,12 +4296,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_constant_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4205,12 +4327,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_constant_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4248,6 +4372,7 @@ ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_constant_v2s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4268,6 +4393,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v2s32_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4337,6 +4463,7 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_constant_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4375,6 +4502,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v2s32_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4430,12 +4558,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_load_constant_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v3s32_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4461,12 +4591,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_load_constant_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v3s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4490,12 +4622,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_constant_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v4s32_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4519,12 +4653,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_constant_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v4s32_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4548,12 +4684,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_constant_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v4s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4577,12 +4715,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; VI-LABEL: name: test_load_constant_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v8s32_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4606,12 +4746,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; VI-LABEL: name: test_load_constant_v16s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v16s32_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4635,12 +4777,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; VI-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; GFX9-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4664,12 +4808,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_constant_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v2s64_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4693,12 +4839,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_constant_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v2s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4722,12 +4870,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_constant_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v2s64_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4788,6 +4938,7 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_constant_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4831,6 +4982,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v2s64_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4962,6 +5114,7 @@ ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_constant_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5039,6 +5192,7 @@ ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v2s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5137,6 +5291,7 @@ ; CI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5147,6 +5302,7 @@ ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v3s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5183,6 +5339,7 @@ ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v3s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5196,6 +5353,7 @@ ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v3s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5335,6 +5493,7 @@ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v3s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5448,6 +5607,7 @@ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v3s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5580,12 +5740,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v4s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v4s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5609,12 +5771,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v4s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v4s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5777,6 +5941,7 @@ ; CI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_constant_v4s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5922,6 +6087,7 @@ ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_constant_v4s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6085,6 +6251,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; VI-LABEL: name: test_load_constant_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6092,6 +6259,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; GFX9-LABEL: name: test_load_constant_v2s128_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6117,6 +6285,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_constant_v2p1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6124,6 +6293,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_constant_v2p1_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6149,6 +6319,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_constant_v2p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6156,6 +6327,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_constant_v2p1_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6181,6 +6353,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_constant_v2p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6188,6 +6361,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_constant_v2p1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6275,6 +6449,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_constant_v2p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6344,6 +6519,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_constant_v2p1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6430,12 +6606,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; VI-LABEL: name: test_load_constant_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_constant_v2p3_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6459,12 +6637,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; VI-LABEL: name: test_load_constant_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_constant_v2p3_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6522,6 +6702,7 @@ ; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_constant_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6562,6 +6743,7 @@ ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_constant_v2p3_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6619,12 +6801,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_constant_s32_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_constant_s32_from_1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6648,12 +6832,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_constant_s32_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_constant_s32_from_2_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6679,6 +6865,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6686,6 +6873,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6711,6 +6899,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6718,6 +6907,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6743,6 +6933,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6750,6 +6941,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6778,6 +6970,7 @@ ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; VI-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6788,6 +6981,7 @@ ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6816,6 +7010,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6823,6 +7018,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6848,6 +7044,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6855,6 +7052,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6911,6 +7109,7 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6949,6 +7148,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7018,6 +7218,7 @@ ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7038,6 +7239,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7075,12 +7277,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7104,12 +7308,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_extload_constant_v3s32_from_6_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-LABEL: name: test_extload_constant_v3s32_from_6_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7133,12 +7339,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_extload_constant_v4s32_from_8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_extload_constant_v4s32_from_8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7258,6 +7466,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7360,6 +7569,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7528,6 +7738,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7580,6 +7791,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7660,6 +7872,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7674,6 +7887,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7716,6 +7930,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7730,6 +7945,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7765,6 +7981,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) + ; ; VI-LABEL: name: test_load_constant_s512_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7772,6 +7989,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) + ; ; GFX9-LABEL: name: test_load_constant_s512_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7797,6 +8015,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) + ; ; VI-LABEL: name: test_load_constant_v4s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7804,6 +8023,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) + ; ; GFX9-LABEL: name: test_load_constant_v4s128_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -19,6 +19,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_flat_s1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -27,6 +28,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_flat_s2_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -63,6 +66,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s2_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -89,12 +93,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -119,12 +125,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -149,12 +157,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -179,12 +189,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -215,6 +227,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_flat_s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -227,6 +240,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -257,12 +271,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -292,6 +308,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_flat_s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -304,6 +321,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s32_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -349,6 +367,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_flat_s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -371,6 +390,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s32_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -416,17 +436,17 @@ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C4]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND3]](s64) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C4]] + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[AND2]](s64) + ; ; VI-LABEL: name: test_load_flat_s48_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -439,17 +459,17 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C4]] - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND3]](s64) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C4]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND2]](s64) + ; ; GFX9-LABEL: name: test_load_flat_s48_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -480,6 +500,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_flat_s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -490,6 +511,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_flat_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -517,6 +539,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_flat_s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -527,6 +550,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_flat_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -564,6 +588,7 @@ ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_flat_s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -584,6 +609,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_flat_s64_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -657,6 +683,7 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_flat_s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -695,6 +722,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_flat_s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -762,6 +790,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_flat_s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -776,6 +805,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_flat_s96_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -808,6 +838,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_flat_s96_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -822,6 +853,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_flat_s96_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -854,6 +886,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_flat_s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -868,6 +901,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_flat_s96_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -914,6 +948,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_flat_s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -942,6 +977,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_flat_s96_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1035,6 +1071,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_flat_s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1089,6 +1126,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_flat_s96_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1174,6 +1212,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; VI-LABEL: name: test_load_flat_s160_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1194,6 +1233,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; GFX9-LABEL: name: test_load_flat_s160_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1245,6 +1285,7 @@ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; VI-LABEL: name: test_load_flat_s224_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1273,6 +1314,7 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; GFX9-LABEL: name: test_load_flat_s224_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1319,6 +1361,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_flat_s128_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1336,6 +1379,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_flat_s128_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1371,6 +1415,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_flat_s128_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1388,6 +1433,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_flat_s128_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1475,6 +1521,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_flat_s128_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1544,6 +1591,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_flat_s128_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1653,6 +1701,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; VI-LABEL: name: test_load_flat_s256_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1682,6 +1731,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; GFX9-LABEL: name: test_load_flat_s256_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1714,6 +1764,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_flat_p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1724,6 +1775,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_flat_p1_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1751,6 +1803,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_flat_p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1761,6 +1814,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_flat_p1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1816,6 +1870,7 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_flat_p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1854,6 +1909,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_flat_p1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1914,12 +1970,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; VI-LABEL: name: test_load_flat_p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: test_load_flat_p3_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1947,6 +2005,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; VI-LABEL: name: test_load_flat_p4_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1957,6 +2016,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; GFX9-LABEL: name: test_load_flat_p4_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1984,6 +2044,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; VI-LABEL: name: test_load_flat_p4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1994,6 +2055,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; GFX9-LABEL: name: test_load_flat_p4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2031,6 +2093,7 @@ ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; VI-LABEL: name: test_load_flat_p4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2051,6 +2114,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; GFX9-LABEL: name: test_load_flat_p4_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2125,6 +2189,7 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; VI-LABEL: name: test_load_flat_p4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2163,6 +2228,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; ; GFX9-LABEL: name: test_load_flat_p4_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2223,12 +2289,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; VI-LABEL: name: test_load_flat_p5_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-LABEL: name: test_load_flat_p5_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2259,6 +2327,7 @@ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_flat_p5_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2272,6 +2341,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_flat_p5_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2319,6 +2389,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_flat_p5_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2342,6 +2413,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_flat_p5_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2382,12 +2454,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v2s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2413,12 +2487,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v2s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2450,6 +2526,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_flat_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2462,6 +2539,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v2s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2497,8 +2575,8 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -2518,6 +2596,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_flat_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2528,8 +2607,8 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -2547,6 +2626,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v3s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2557,8 +2637,8 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -2609,8 +2689,8 @@ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -2630,6 +2710,7 @@ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; VI-LABEL: name: test_load_flat_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2650,8 +2731,8 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -2669,6 +2750,7 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v3s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2689,8 +2771,8 @@ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -2727,12 +2809,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v4s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2763,6 +2847,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_flat_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2775,6 +2860,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v4s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2821,6 +2907,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_flat_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2843,6 +2930,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_flat_v4s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2887,6 +2975,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_flat_v8s8_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2897,6 +2986,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v8s8_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2931,6 +3021,7 @@ ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_flat_v16s8_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2947,6 +3038,7 @@ ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v16s8_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2993,6 +3085,7 @@ ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; VI-LABEL: name: test_load_flat_v32s8_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3021,6 +3114,7 @@ ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v32s8_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3050,12 +3144,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; VI-LABEL: name: test_load_flat_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v2s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3089,6 +3185,7 @@ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_flat_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3105,6 +3202,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v2s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3154,6 +3252,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_flat_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3180,6 +3279,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v2s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3231,22 +3331,21 @@ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_flat_v3s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3265,22 +3364,21 @@ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v3s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3333,22 +3431,21 @@ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_flat_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3367,22 +3464,21 @@ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v3s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3452,13 +3548,13 @@ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_flat_v3s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3487,13 +3583,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v3s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3577,13 +3673,13 @@ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_flat_v3s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3626,13 +3722,13 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v3s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3698,6 +3794,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_flat_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3708,6 +3805,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3735,6 +3833,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_flat_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3745,6 +3844,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s16>) from unknown-address + 4) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3789,6 +3889,7 @@ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_flat_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3816,6 +3917,7 @@ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v4s16_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -3894,6 +3996,7 @@ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_flat_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3939,6 +4042,7 @@ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v4s16_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4007,6 +4111,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; VI-LABEL: name: test_load_flat_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4024,6 +4129,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-LABEL: name: test_load_flat_v8s16_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4052,6 +4158,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_flat_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4062,6 +4169,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4089,6 +4197,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_flat_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4099,6 +4208,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4127,6 +4237,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_flat_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4137,6 +4248,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v2s32_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4167,6 +4279,7 @@ ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_flat_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4180,6 +4293,7 @@ ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8, align 8) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v3s32_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4212,6 +4326,7 @@ ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_flat_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4225,6 +4340,7 @@ ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from unknown-address + 8) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v3s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4258,6 +4374,7 @@ ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_flat_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4274,6 +4391,7 @@ ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v4s32_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4307,6 +4425,7 @@ ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_flat_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4323,6 +4442,7 @@ ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v4s32_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4356,6 +4476,7 @@ ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_flat_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4372,6 +4493,7 @@ ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from unknown-address + 12) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v4s32_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4417,6 +4539,7 @@ ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; VI-LABEL: name: test_load_flat_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4445,6 +4568,7 @@ ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s32) from unknown-address + 28) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v8s32_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4518,6 +4642,7 @@ ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s32) from unknown-address + 60) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; VI-LABEL: name: test_load_flat_v16s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4570,6 +4695,7 @@ ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s32) from unknown-address + 60) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; GFX9-LABEL: name: test_load_flat_v16s32_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4614,6 +4740,7 @@ ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_flat_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4631,6 +4758,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v2s64_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4665,6 +4793,7 @@ ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_flat_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4682,6 +4811,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v2s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4716,6 +4846,7 @@ ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_flat_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4733,6 +4864,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v2s64_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4785,6 +4917,7 @@ ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_flat_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4820,6 +4953,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v2s64_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -4943,6 +5077,7 @@ ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_flat_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5012,6 +5147,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v2s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5125,6 +5261,7 @@ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5150,6 +5287,7 @@ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v3s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5201,6 +5339,7 @@ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v3s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5226,6 +5365,7 @@ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v3s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5353,6 +5493,7 @@ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v3s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5454,6 +5595,7 @@ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v3s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5609,6 +5751,7 @@ ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v4s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5638,6 +5781,7 @@ ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v4s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5688,6 +5832,7 @@ ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v4s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5717,6 +5862,7 @@ ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v4s64_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -5867,6 +6013,7 @@ ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR20]](s32), [[OR23]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_flat_v4s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5996,6 +6143,7 @@ ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR20]](s32), [[OR23]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_flat_v4s64_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6182,6 +6330,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; VI-LABEL: name: test_load_flat_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6211,6 +6360,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; GFX9-LABEL: name: test_load_flat_v2s128_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6250,6 +6400,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_flat_v2p1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6267,6 +6418,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_flat_v2p1_align16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6302,6 +6454,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_flat_v2p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6319,6 +6472,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_flat_v2p1_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6354,6 +6508,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_flat_v2p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6371,6 +6526,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_flat_v2p1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6458,6 +6614,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_flat_v2p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6527,6 +6684,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_flat_v2p1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6617,6 +6775,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_flat_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6627,6 +6786,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_flat_v2p3_align8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6654,6 +6814,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_flat_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6664,6 +6825,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p0) :: (load (p3) from unknown-address + 4) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_flat_v2p3_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6721,6 +6883,7 @@ ; CI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_flat_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6761,6 +6924,7 @@ ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_flat_v2p3_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6818,12 +6982,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6847,12 +7013,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6878,6 +7046,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6885,6 +7054,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6910,6 +7080,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6917,6 +7088,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6942,6 +7114,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6949,6 +7122,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -6977,6 +7151,7 @@ ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; VI-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6987,6 +7162,7 @@ ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7015,6 +7191,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7022,6 +7199,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7047,6 +7225,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7054,6 +7233,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -7079,12 +7259,14 @@ ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_flat_s32_align536870912 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_flat_s32_align536870912 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -39,6 +39,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -47,6 +48,7 @@ ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -55,6 +57,7 @@ ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-MESA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_global_s1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -63,6 +66,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -71,6 +75,7 @@ ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -99,6 +104,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s2_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -107,6 +113,7 @@ ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s2_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -115,6 +122,7 @@ ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-MESA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_global_s2_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -123,6 +131,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s2_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -131,6 +140,7 @@ ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s2_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -157,30 +167,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -205,30 +220,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -253,30 +273,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -301,30 +326,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -355,12 +385,14 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -373,6 +405,7 @@ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_global_s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -385,12 +418,14 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -421,30 +456,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -474,12 +514,14 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s32_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s32_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -492,6 +534,7 @@ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_global_s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -504,12 +547,14 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -555,12 +600,14 @@ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s32_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s32_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -583,6 +630,7 @@ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_global_s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -605,12 +653,14 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -650,30 +700,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s24_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s24_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s24_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -698,30 +753,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s24_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s24_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -752,6 +812,7 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s24_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -764,6 +825,7 @@ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s24_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -776,6 +838,7 @@ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_global_s24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -788,6 +851,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -800,6 +864,7 @@ ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -842,6 +907,7 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s24_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -854,6 +920,7 @@ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s24_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -872,6 +939,7 @@ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; VI-LABEL: name: test_load_global_s24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -890,6 +958,7 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s24_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -902,6 +971,7 @@ ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s24_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -940,6 +1010,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; CI-HSA-LABEL: name: test_load_global_s48_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -948,6 +1019,7 @@ ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; CI-MESA-LABEL: name: test_load_global_s48_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -956,6 +1028,7 @@ ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; VI-LABEL: name: test_load_global_s48_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -964,6 +1037,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; GFX9-HSA-LABEL: name: test_load_global_s48_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -972,6 +1046,7 @@ ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LOAD]], [[C]] ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; ; GFX9-MESA-LABEL: name: test_load_global_s48_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -998,30 +1073,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-HSA-LABEL: name: test_load_global_s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-MESA-LABEL: name: test_load_global_s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_global_s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1045,30 +1125,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-HSA-LABEL: name: test_load_global_s64_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-MESA-LABEL: name: test_load_global_s64_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_global_s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1110,12 +1195,14 @@ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; CI-HSA-LABEL: name: test_load_global_s64_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-MESA-LABEL: name: test_load_global_s64_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1140,6 +1227,7 @@ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; VI-LABEL: name: test_load_global_s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1164,12 +1252,14 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1247,12 +1337,14 @@ ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; CI-HSA-LABEL: name: test_load_global_s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-MESA-LABEL: name: test_load_global_s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1295,6 +1387,7 @@ ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; VI-LABEL: name: test_load_global_s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1337,12 +1430,14 @@ ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX9-HSA-LABEL: name: test_load_global_s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-MESA-LABEL: name: test_load_global_s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1405,6 +1500,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-HSA-LABEL: name: test_load_global_s96_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1412,6 +1508,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-MESA-LABEL: name: test_load_global_s96_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1419,6 +1516,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_global_s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1426,6 +1524,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -1433,6 +1532,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1463,6 +1563,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-HSA-LABEL: name: test_load_global_s96_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1470,6 +1571,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-MESA-LABEL: name: test_load_global_s96_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1477,6 +1579,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_global_s96_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1484,6 +1587,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -1491,6 +1595,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1521,6 +1626,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-HSA-LABEL: name: test_load_global_s96_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1528,6 +1634,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-MESA-LABEL: name: test_load_global_s96_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1535,6 +1642,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_global_s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1542,6 +1650,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -1549,6 +1658,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1595,6 +1705,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-HSA-LABEL: name: test_load_global_s96_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1602,6 +1713,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-MESA-LABEL: name: test_load_global_s96_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1630,6 +1742,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_global_s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1658,6 +1771,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -1665,6 +1779,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1758,6 +1873,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-HSA-LABEL: name: test_load_global_s96_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1765,6 +1881,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-MESA-LABEL: name: test_load_global_s96_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1819,6 +1936,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_global_s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1873,6 +1991,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-HSA-LABEL: name: test_load_global_s96_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -1880,6 +1999,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-MESA-LABEL: name: test_load_global_s96_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -1957,6 +2077,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; SI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; CI-HSA-LABEL: name: test_load_global_s160_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -1969,6 +2090,7 @@ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI-HSA-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; CI-MESA-LABEL: name: test_load_global_s160_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -1981,6 +2103,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI-MESA-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; VI-LABEL: name: test_load_global_s160_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1993,6 +2116,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; GFX9-HSA-LABEL: name: test_load_global_s160_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2005,6 +2129,7 @@ ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; GFX9-HSA-NEXT: S_NOP 0, implicit [[BITCAST]](s160) + ; ; GFX9-MESA-LABEL: name: test_load_global_s160_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2046,6 +2171,7 @@ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; CI-HSA-LABEL: name: test_load_global_s224_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -2061,6 +2187,7 @@ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; CI-MESA-LABEL: name: test_load_global_s224_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2076,6 +2203,7 @@ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; VI-LABEL: name: test_load_global_s224_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2091,6 +2219,7 @@ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; GFX9-HSA-LABEL: name: test_load_global_s224_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2106,6 +2235,7 @@ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) + ; ; GFX9-MESA-LABEL: name: test_load_global_s224_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2142,6 +2272,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-HSA-LABEL: name: test_load_global_s128_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -2149,6 +2280,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-MESA-LABEL: name: test_load_global_s128_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2156,6 +2288,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_global_s128_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2163,6 +2296,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-HSA-LABEL: name: test_load_global_s128_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2170,6 +2304,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-MESA-LABEL: name: test_load_global_s128_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2195,6 +2330,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-HSA-LABEL: name: test_load_global_s128_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -2202,6 +2338,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-MESA-LABEL: name: test_load_global_s128_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2209,6 +2346,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_global_s128_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2216,6 +2354,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-HSA-LABEL: name: test_load_global_s128_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2223,6 +2362,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-MESA-LABEL: name: test_load_global_s128_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2310,6 +2450,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-HSA-LABEL: name: test_load_global_s128_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -2317,6 +2458,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-MESA-LABEL: name: test_load_global_s128_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2386,6 +2528,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_global_s128_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2455,6 +2598,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-HSA-LABEL: name: test_load_global_s128_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2462,6 +2606,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-MESA-LABEL: name: test_load_global_s128_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2549,6 +2694,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; CI-HSA-LABEL: name: test_load_global_s256_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -2556,6 +2702,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; CI-MESA-LABEL: name: test_load_global_s256_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2563,6 +2710,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; VI-LABEL: name: test_load_global_s256_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2570,6 +2718,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; GFX9-HSA-LABEL: name: test_load_global_s256_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -2577,6 +2726,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 16, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) + ; ; GFX9-MESA-LABEL: name: test_load_global_s256_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2601,30 +2751,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-HSA-LABEL: name: test_load_global_p1_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-MESA-LABEL: name: test_load_global_p1_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_global_p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-HSA-LABEL: name: test_load_global_p1_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-MESA-LABEL: name: test_load_global_p1_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2648,30 +2803,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-HSA-LABEL: name: test_load_global_p1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-MESA-LABEL: name: test_load_global_p1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_global_p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-HSA-LABEL: name: test_load_global_p1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-MESA-LABEL: name: test_load_global_p1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2732,12 +2892,14 @@ ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-HSA-LABEL: name: test_load_global_p1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-MESA-LABEL: name: test_load_global_p1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -2781,6 +2943,7 @@ ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; VI-LABEL: name: test_load_global_p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2824,12 +2987,14 @@ ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-HSA-LABEL: name: test_load_global_p1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-MESA-LABEL: name: test_load_global_p1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2890,30 +3055,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; CI-HSA-LABEL: name: test_load_global_p3_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; CI-MESA-LABEL: name: test_load_global_p3_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; VI-LABEL: name: test_load_global_p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-HSA-LABEL: name: test_load_global_p3_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-MESA-LABEL: name: test_load_global_p3_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2937,30 +3107,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-HSA-LABEL: name: test_load_global_p4_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-MESA-LABEL: name: test_load_global_p4_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; VI-LABEL: name: test_load_global_p4_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-HSA-LABEL: name: test_load_global_p4_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-MESA-LABEL: name: test_load_global_p4_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -2984,30 +3159,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-HSA-LABEL: name: test_load_global_p4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-MESA-LABEL: name: test_load_global_p4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; VI-LABEL: name: test_load_global_p4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-HSA-LABEL: name: test_load_global_p4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-MESA-LABEL: name: test_load_global_p4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3050,12 +3230,14 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; CI-HSA-LABEL: name: test_load_global_p4_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-MESA-LABEL: name: test_load_global_p4_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3081,6 +3263,7 @@ ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; VI-LABEL: name: test_load_global_p4_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3106,12 +3289,14 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; GFX9-HSA-LABEL: name: test_load_global_p4_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-MESA-LABEL: name: test_load_global_p4_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3191,12 +3376,14 @@ ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; CI-HSA-LABEL: name: test_load_global_p4_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; CI-MESA-LABEL: name: test_load_global_p4_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3240,6 +3427,7 @@ ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; VI-LABEL: name: test_load_global_p4_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3283,12 +3471,14 @@ ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) + ; ; GFX9-HSA-LABEL: name: test_load_global_p4_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; ; GFX9-MESA-LABEL: name: test_load_global_p4_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3349,30 +3539,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-HSA-LABEL: name: test_load_global_p5_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-MESA-LABEL: name: test_load_global_p5_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; VI-LABEL: name: test_load_global_p5_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-HSA-LABEL: name: test_load_global_p5_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-MESA-LABEL: name: test_load_global_p5_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3403,12 +3598,14 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-HSA-LABEL: name: test_load_global_p5_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-MESA-LABEL: name: test_load_global_p5_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3422,6 +3619,7 @@ ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_global_p5_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3435,12 +3633,14 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-HSA-LABEL: name: test_load_global_p5_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-MESA-LABEL: name: test_load_global_p5_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3488,12 +3688,14 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-HSA-LABEL: name: test_load_global_p5_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-MESA-LABEL: name: test_load_global_p5_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3517,6 +3719,7 @@ ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-MESA-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_global_p5_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3540,12 +3743,14 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-HSA-LABEL: name: test_load_global_p5_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-MESA-LABEL: name: test_load_global_p5_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3586,30 +3791,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_v2s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_v2s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3635,30 +3845,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_v2s8_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_v2s8_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3690,12 +3905,14 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_v2s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_v2s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3708,6 +3925,7 @@ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_global_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3720,12 +3938,14 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3761,8 +3981,8 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -3782,6 +4002,7 @@ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_v3s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -3792,8 +4013,8 @@ ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -3813,6 +4034,7 @@ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_v3s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3823,8 +4045,8 @@ ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -3844,6 +4066,7 @@ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_global_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3854,8 +4077,8 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -3873,6 +4096,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -3883,8 +4107,8 @@ ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -3902,6 +4126,7 @@ ; GFX9-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3912,8 +4137,8 @@ ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -3965,8 +4190,8 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -3986,6 +4211,7 @@ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; SI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_v3s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -4001,8 +4227,8 @@ ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4022,6 +4248,7 @@ ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_v3s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4042,8 +4269,8 @@ ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4063,6 +4290,7 @@ ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; VI-LABEL: name: test_load_global_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4083,8 +4311,8 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -4102,6 +4330,7 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -4117,8 +4346,8 @@ ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX9-HSA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -4136,6 +4365,7 @@ ; GFX9-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4156,8 +4386,8 @@ ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -4202,6 +4432,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -4216,6 +4447,7 @@ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4230,6 +4462,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; VI-LABEL: name: test_load_global_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4244,6 +4477,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -4264,6 +4498,7 @@ ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4314,6 +4549,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s8_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -4328,6 +4564,7 @@ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s8_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4347,6 +4584,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; VI-LABEL: name: test_load_global_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4366,6 +4604,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -4386,6 +4625,7 @@ ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4450,6 +4690,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -4464,6 +4705,7 @@ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4492,6 +4734,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; VI-LABEL: name: test_load_global_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4520,6 +4763,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -4540,6 +4784,7 @@ ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC4]](<4 x s8>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4591,30 +4836,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v8s8_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v8s8_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_global_v8s8_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4639,30 +4889,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v16s8_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v16s8_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_global_v16s8_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4687,30 +4942,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v32s8_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v32s8_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; VI-LABEL: name: test_load_global_v32s8_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4736,30 +4996,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4793,12 +5058,14 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4815,6 +5082,7 @@ ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4831,12 +5099,14 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -4886,12 +5156,14 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -4918,6 +5190,7 @@ ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -4944,12 +5217,14 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5002,13 +5277,13 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -5028,13 +5303,13 @@ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -5054,13 +5329,13 @@ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v3s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5080,13 +5355,13 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -5108,6 +5383,7 @@ ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5170,13 +5446,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -5205,13 +5481,13 @@ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -5240,13 +5516,13 @@ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5275,13 +5551,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -5310,6 +5586,7 @@ ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5379,13 +5656,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -5414,13 +5691,13 @@ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -5449,13 +5726,13 @@ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v3s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5484,13 +5761,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -5519,6 +5796,7 @@ ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5602,13 +5880,13 @@ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -5637,13 +5915,13 @@ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -5686,13 +5964,13 @@ ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v3s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -5735,13 +6013,13 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -5770,6 +6048,7 @@ ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5831,30 +6110,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_global_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5878,30 +6162,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_global_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -5947,12 +6236,14 @@ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -5981,6 +6272,7 @@ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_global_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6009,12 +6301,14 @@ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6094,12 +6388,14 @@ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -6146,6 +6442,7 @@ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_global_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6192,12 +6489,14 @@ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6270,6 +6569,7 @@ ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -6291,6 +6591,7 @@ ; CI-HSA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -6312,6 +6613,7 @@ ; CI-MESA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v5s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6333,6 +6635,7 @@ ; VI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -6352,6 +6655,7 @@ ; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6410,6 +6714,7 @@ ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -6450,6 +6755,7 @@ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -6490,6 +6796,7 @@ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v5s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6530,6 +6837,7 @@ ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -6562,6 +6870,7 @@ ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6633,6 +6942,7 @@ ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -6673,6 +6983,7 @@ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -6713,6 +7024,7 @@ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v5s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6753,6 +7065,7 @@ ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -6785,6 +7098,7 @@ ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6874,6 +7188,7 @@ ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -6914,6 +7229,7 @@ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -6954,6 +7270,7 @@ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v5s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -6994,6 +7311,7 @@ ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7026,6 +7344,7 @@ ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7137,6 +7456,7 @@ ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7177,6 +7497,7 @@ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7239,6 +7560,7 @@ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v5s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7301,6 +7623,7 @@ ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7333,6 +7656,7 @@ ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7413,6 +7737,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7420,6 +7745,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7427,6 +7753,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v6s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7434,6 +7761,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7441,6 +7769,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7471,6 +7800,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7478,6 +7808,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7485,6 +7816,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v6s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7492,6 +7824,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7499,6 +7832,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7529,6 +7863,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7536,6 +7871,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7543,6 +7879,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v6s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7550,6 +7887,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7557,6 +7895,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7603,6 +7942,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7610,6 +7950,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7638,6 +7979,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v6s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7666,6 +8008,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7673,6 +8016,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 2, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7766,6 +8110,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v6s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7773,6 +8118,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v6s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -7827,6 +8173,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; VI-LABEL: name: test_load_global_v6s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -7881,6 +8228,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -7888,6 +8236,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 1, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -7975,6 +8324,7 @@ ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -7997,6 +8347,7 @@ ; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -8019,6 +8370,7 @@ ; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v7s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -8041,6 +8393,7 @@ ; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -8062,6 +8415,7 @@ ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[UV14]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -8153,6 +8507,7 @@ ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -8205,6 +8560,7 @@ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -8257,6 +8613,7 @@ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v7s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -8309,6 +8666,7 @@ ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -8351,6 +8709,7 @@ ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -8463,6 +8822,7 @@ ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -8515,6 +8875,7 @@ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -8567,6 +8928,7 @@ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v7s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -8619,6 +8981,7 @@ ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -8661,6 +9024,7 @@ ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -8773,6 +9137,7 @@ ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -8825,6 +9190,7 @@ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -8877,6 +9243,7 @@ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v7s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -8929,6 +9296,7 @@ ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -8971,6 +9339,7 @@ ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9113,6 +9482,7 @@ ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -9165,6 +9535,7 @@ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -9247,6 +9618,7 @@ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; VI-LABEL: name: test_load_global_v7s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -9329,6 +9701,7 @@ ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -9371,6 +9744,7 @@ ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9468,6 +9842,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v8s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -9475,6 +9850,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v8s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -9482,6 +9858,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; VI-LABEL: name: test_load_global_v8s16_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -9489,6 +9866,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -9496,6 +9874,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9521,6 +9900,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; CI-HSA-LABEL: name: test_load_global_v8s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -9528,6 +9908,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; CI-MESA-LABEL: name: test_load_global_v8s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -9535,6 +9916,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; VI-LABEL: name: test_load_global_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -9542,6 +9924,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -9549,6 +9932,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9573,30 +9957,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_global_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9620,30 +10009,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_global_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9681,12 +10075,14 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -9707,6 +10103,7 @@ ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_global_v2s32_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -9727,12 +10124,14 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9802,12 +10201,14 @@ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s32_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s32_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -9846,6 +10247,7 @@ ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_global_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -9884,12 +10286,14 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9947,30 +10351,35 @@ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s32_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s32_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_load_global_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s32_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s32_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -9999,30 +10408,35 @@ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_load_global_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10046,30 +10460,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s32_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s32_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_global_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10093,30 +10512,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s32_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s32_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_global_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10140,30 +10564,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s32_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s32_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_global_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10187,30 +10616,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v8s32_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v8s32_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; VI-LABEL: name: test_load_global_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v8s32_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v8s32_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10234,30 +10668,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; CI-HSA-LABEL: name: test_load_global_v16s32_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; CI-MESA-LABEL: name: test_load_global_v16s32_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; VI-LABEL: name: test_load_global_v16s32_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v16s32_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v16s32_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10281,30 +10720,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_global_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10328,30 +10772,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_global_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10375,30 +10824,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; VI-LABEL: name: test_load_global_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10459,12 +10913,14 @@ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -10508,6 +10964,7 @@ ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_global_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -10551,12 +11008,14 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10688,12 +11147,14 @@ ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -10771,6 +11232,7 @@ ; CI-MESA-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_global_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -10848,12 +11310,14 @@ ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -10949,6 +11413,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v2sp1_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -10956,6 +11421,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v2sp1_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -10963,6 +11429,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_global_v2sp1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -10970,6 +11437,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2sp1_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -10977,6 +11445,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2sp1_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11005,6 +11474,7 @@ ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -11015,6 +11485,7 @@ ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -11025,6 +11496,7 @@ ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -11035,6 +11507,7 @@ ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -11045,6 +11518,7 @@ ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11081,6 +11555,7 @@ ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -11094,6 +11569,7 @@ ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -11107,6 +11583,7 @@ ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v3s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -11120,6 +11597,7 @@ ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -11133,6 +11611,7 @@ ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11272,6 +11751,7 @@ ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -11285,6 +11765,7 @@ ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -11398,6 +11879,7 @@ ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v3s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -11511,6 +11993,7 @@ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -11524,6 +12007,7 @@ ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11656,30 +12140,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s64_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s64_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v4s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11703,30 +12192,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s64_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s64_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v4s64_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -11889,12 +12383,14 @@ ; SI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-HSA-LABEL: name: test_load_global_v4s64_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; CI-MESA-LABEL: name: test_load_global_v4s64_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12040,6 +12536,7 @@ ; CI-MESA-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_global_v4s64_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12185,12 +12682,14 @@ ; VI-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[OR27]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12354,6 +12853,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; CI-HSA-LABEL: name: test_load_global_v2s128_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12361,6 +12861,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; CI-MESA-LABEL: name: test_load_global_v2s128_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12368,6 +12869,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; VI-LABEL: name: test_load_global_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12375,6 +12877,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2s128_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12382,6 +12885,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2s128_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12407,6 +12911,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12414,6 +12919,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12421,6 +12927,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_global_v2p1_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12428,6 +12935,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12435,6 +12943,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12460,6 +12969,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12467,6 +12977,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12474,6 +12985,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_global_v2p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12481,6 +12993,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12488,6 +13001,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12513,6 +13027,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12520,6 +13035,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12527,6 +13043,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_global_v2p1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12534,6 +13051,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12541,6 +13059,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12628,6 +13147,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12635,6 +13155,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12704,6 +13225,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_global_v2p1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12773,6 +13295,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12780,6 +13303,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 1, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12867,6 +13391,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; CI-HSA-LABEL: name: test_load_global_v4p1_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -12874,6 +13399,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; CI-MESA-LABEL: name: test_load_global_v4p1_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -12881,6 +13407,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; VI-LABEL: name: test_load_global_v4p1_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -12888,6 +13415,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v4p1_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -12895,6 +13423,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), align 8, addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v4p1_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12919,30 +13448,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p3_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p3_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; VI-LABEL: name: test_load_global_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align8 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -12966,30 +13500,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p3_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p3_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; VI-LABEL: name: test_load_global_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13047,12 +13586,14 @@ ; SI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; CI-HSA-LABEL: name: test_load_global_v2p3_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-MESA-LABEL: name: test_load_global_v2p3_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13093,6 +13634,7 @@ ; CI-MESA-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_global_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13133,12 +13675,14 @@ ; VI-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13196,30 +13740,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_global_s32_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13243,30 +13792,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_global_s32_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13302,6 +13856,7 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13314,6 +13869,7 @@ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13332,6 +13888,7 @@ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13350,6 +13907,7 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13362,6 +13920,7 @@ ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13402,6 +13961,7 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13414,6 +13974,7 @@ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13426,6 +13987,7 @@ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13438,6 +14000,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13450,6 +14013,7 @@ ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-HSA-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13479,30 +14043,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13528,6 +14097,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13535,6 +14105,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13542,6 +14113,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_global_s64_from_1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13549,6 +14121,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13556,6 +14129,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13581,6 +14155,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13588,6 +14163,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13595,6 +14171,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_global_s64_from_2_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13602,6 +14179,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13609,6 +14187,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13634,6 +14213,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13641,6 +14221,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13648,6 +14229,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_global_s64_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13655,6 +14237,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13662,6 +14245,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13690,6 +14274,7 @@ ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13700,6 +14285,7 @@ ; CI-HSA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13710,6 +14296,7 @@ ; CI-MESA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; VI-LABEL: name: test_ext_load_global_s128_from_4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13720,6 +14307,7 @@ ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13730,6 +14318,7 @@ ; GFX9-HSA-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13758,6 +14347,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13765,6 +14355,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13772,6 +14363,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_global_s64_from_2_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13779,6 +14371,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13786,6 +14379,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13811,6 +14405,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -13818,6 +14413,7 @@ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -13825,6 +14421,7 @@ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_global_s64_from_1_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -13832,6 +14429,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -13839,6 +14437,7 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13863,30 +14462,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13910,30 +14514,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -13957,30 +14566,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14004,30 +14618,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-HSA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-MESA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-HSA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-MESA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14051,30 +14670,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-HSA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-MESA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-HSA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-MESA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14194,6 +14818,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-HSA-LABEL: name: test_global_v2s96_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14208,6 +14833,7 @@ ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-MESA-LABEL: name: test_global_v2s96_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14310,6 +14936,7 @@ ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_global_v2s96_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14412,6 +15039,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14426,6 +15054,7 @@ ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14594,6 +15223,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-HSA-LABEL: name: test_global_v2s96_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14608,6 +15238,7 @@ ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-MESA-LABEL: name: test_global_v2s96_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14660,6 +15291,7 @@ ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_global_v2s96_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14712,6 +15344,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14726,6 +15359,7 @@ ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14815,6 +15449,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-HSA-LABEL: name: test_global_v2s96_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14829,6 +15464,7 @@ ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-MESA-LABEL: name: test_global_v2s96_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14843,6 +15479,7 @@ ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_global_v2s96_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14857,6 +15494,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14871,6 +15509,7 @@ ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -14920,6 +15559,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-HSA-LABEL: name: test_global_v2s96_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -14934,6 +15574,7 @@ ; CI-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-MESA-LABEL: name: test_global_v2s96_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -14948,6 +15589,7 @@ ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-MESA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_global_v2s96_align16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -14962,6 +15604,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-HSA-LABEL: name: test_global_v2s96_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -14976,6 +15619,7 @@ ; GFX9-HSA-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-MESA-LABEL: name: test_global_v2s96_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -15074,6 +15718,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; ; CI-HSA-LABEL: name: test_load_global_v32s1_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -15144,6 +15789,7 @@ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; ; CI-MESA-LABEL: name: test_load_global_v32s1_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -15214,6 +15860,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; ; VI-LABEL: name: test_load_global_v32s1_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -15284,6 +15931,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32), [[LSHR7]](s32), [[LSHR8]](s32), [[LSHR9]](s32), [[LSHR10]](s32), [[LSHR11]](s32), [[LSHR12]](s32), [[LSHR13]](s32), [[LSHR14]](s32), [[LSHR15]](s32), [[LSHR16]](s32), [[LSHR17]](s32), [[LSHR18]](s32), [[LSHR19]](s32), [[LSHR20]](s32), [[LSHR21]](s32), [[LSHR22]](s32), [[LSHR23]](s32), [[LSHR24]](s32), [[LSHR25]](s32), [[LSHR26]](s32), [[LSHR27]](s32), [[LSHR28]](s32), [[LSHR29]](s32), [[LSHR30]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v32s1_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -15402,6 +16050,7 @@ ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>), [[BUILD_VECTOR8]](<2 x s16>), [[BUILD_VECTOR9]](<2 x s16>), [[BUILD_VECTOR10]](<2 x s16>), [[BUILD_VECTOR11]](<2 x s16>), [[BUILD_VECTOR12]](<2 x s16>), [[BUILD_VECTOR13]](<2 x s16>), [[BUILD_VECTOR14]](<2 x s16>), [[BUILD_VECTOR15]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC32:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC32]](<32 x s1>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v32s1_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -15553,6 +16202,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; SI-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; ; CI-HSA-LABEL: name: test_load_global_v8s4_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} @@ -15575,6 +16225,7 @@ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; CI-HSA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; ; CI-MESA-LABEL: name: test_load_global_v8s4_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -15597,6 +16248,7 @@ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; CI-MESA-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; ; VI-LABEL: name: test_load_global_v8s4_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -15619,6 +16271,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32), [[LSHR3]](s32), [[LSHR4]](s32), [[LSHR5]](s32), [[LSHR6]](s32) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; VI-NEXT: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; ; GFX9-HSA-LABEL: name: test_load_global_v8s4_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} @@ -15653,6 +16306,7 @@ ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC8:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[TRUNC8]](<8 x s4>) + ; ; GFX9-MESA-LABEL: name: test_load_global_v8s4_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -15705,30 +16359,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-HSA-LABEL: name: test_load_global_s32_align536870912 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-MESA-LABEL: name: test_load_global_s32_align536870912 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_global_s32_align536870912 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-HSA-LABEL: name: test_load_global_s32_align536870912 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-MESA-LABEL: name: test_load_global_s32_align536870912 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -24,6 +24,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-LABEL: name: test_load_local_s1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -32,6 +33,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s1_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -40,6 +42,7 @@ ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-DS128-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_local_s1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -48,6 +51,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_local_s1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -56,6 +60,7 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -64,6 +69,7 @@ ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-LABEL: name: test_load_local_s1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -72,6 +78,7 @@ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -80,6 +87,7 @@ ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-LABEL: name: test_load_local_s1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -88,6 +96,7 @@ ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -116,6 +125,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-LABEL: name: test_load_local_s2_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -124,6 +134,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s2_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -132,6 +143,7 @@ ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-DS128-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_local_s2_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -140,6 +152,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_local_s2_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -148,6 +161,7 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -156,6 +170,7 @@ ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-LABEL: name: test_load_local_s2_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -164,6 +179,7 @@ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -172,6 +188,7 @@ ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-LABEL: name: test_load_local_s2_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -180,6 +197,7 @@ ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -206,54 +224,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -278,54 +305,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s8_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -350,54 +386,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s16_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -422,54 +467,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -500,6 +554,7 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_local_s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -512,6 +567,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -524,6 +580,7 @@ ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_local_s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -536,6 +593,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_local_s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -548,12 +606,14 @@ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -566,12 +626,14 @@ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -584,6 +646,7 @@ ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -608,54 +671,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -685,6 +757,7 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_local_s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -697,6 +770,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s32_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -709,6 +783,7 @@ ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_local_s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -721,6 +796,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_local_s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -733,12 +809,14 @@ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -751,12 +829,14 @@ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -769,6 +849,7 @@ ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -808,6 +889,7 @@ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-LABEL: name: test_load_local_s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -830,6 +912,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s32_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -852,6 +935,7 @@ ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_local_s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -874,6 +958,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_local_s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -896,12 +981,14 @@ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -924,12 +1011,14 @@ ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -952,6 +1041,7 @@ ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -975,54 +1065,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s24_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s24_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s24_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s24_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s24_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s24_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1047,54 +1146,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_s24_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s24_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_s24_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_s24_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_s24_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_s24_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1125,6 +1233,7 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_local_s24_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1137,6 +1246,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s24_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -1149,6 +1259,7 @@ ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_local_s24_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1161,6 +1272,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_local_s24_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1173,6 +1285,7 @@ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -1185,6 +1298,7 @@ ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-LABEL: name: test_load_local_s24_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1197,6 +1311,7 @@ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -1209,6 +1324,7 @@ ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-LABEL: name: test_load_local_s24_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1221,6 +1337,7 @@ ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1263,6 +1380,7 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; CI-LABEL: name: test_load_local_s24_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1281,6 +1399,7 @@ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_s24_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -1299,6 +1418,7 @@ ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; VI-LABEL: name: test_load_local_s24_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1317,6 +1437,7 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-LABEL: name: test_load_local_s24_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1335,6 +1456,7 @@ ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -1347,6 +1469,7 @@ ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-LABEL: name: test_load_local_s24_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1365,6 +1488,7 @@ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX10-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -1377,6 +1501,7 @@ ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-LABEL: name: test_load_local_s24_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1395,6 +1520,7 @@ ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX11-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1425,54 +1551,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-LABEL: name: test_load_local_s48_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-DS128-LABEL: name: test_load_local_s48_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_local_s48_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-LABEL: name: test_load_local_s48_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-LABEL: name: test_load_local_s48_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-LABEL: name: test_load_local_s48_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1497,54 +1632,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-LABEL: name: test_load_local_s64_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-DS128-LABEL: name: test_load_local_s64_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_local_s64_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-LABEL: name: test_load_local_s64_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-LABEL: name: test_load_local_s64_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-LABEL: name: test_load_local_s64_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1568,54 +1712,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-LABEL: name: test_load_local_s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; CI-DS128-LABEL: name: test_load_local_s64_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; VI-LABEL: name: test_load_local_s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-LABEL: name: test_load_local_s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-LABEL: name: test_load_local_s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-LABEL: name: test_load_local_s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1657,6 +1810,7 @@ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; CI-LABEL: name: test_load_local_s64_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1681,6 +1835,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; CI-DS128-LABEL: name: test_load_local_s64_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -1705,6 +1860,7 @@ ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; VI-LABEL: name: test_load_local_s64_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1729,6 +1885,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX9-LABEL: name: test_load_local_s64_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1753,12 +1910,14 @@ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-LABEL: name: test_load_local_s64_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1783,6 +1942,7 @@ ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -1797,6 +1957,7 @@ ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX11-LABEL: name: test_load_local_s64_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1821,6 +1982,7 @@ ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -1880,6 +2042,7 @@ ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; CI-LABEL: name: test_load_local_s64_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1922,6 +2085,7 @@ ; CI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; CI-DS128-LABEL: name: test_load_local_s64_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -1964,6 +2128,7 @@ ; CI-DS128-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; VI-LABEL: name: test_load_local_s64_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2006,6 +2171,7 @@ ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX9-LABEL: name: test_load_local_s64_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2048,12 +2214,14 @@ ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; ; GFX10-LABEL: name: test_load_local_s64_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2096,6 +2264,7 @@ ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -2110,6 +2279,7 @@ ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX11-LABEL: name: test_load_local_s64_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2152,6 +2322,7 @@ ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -2222,6 +2393,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_local_s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2275,6 +2447,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_s96_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -2328,6 +2501,7 @@ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_local_s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2381,6 +2555,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_local_s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2434,6 +2609,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -2441,6 +2617,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_local_s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2494,6 +2671,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -2508,6 +2686,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_local_s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2561,6 +2740,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -2591,6 +2771,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_local_s96_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2603,6 +2784,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_s96_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -2617,6 +2799,7 @@ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_local_s96_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2631,6 +2814,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_local_s96_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2645,6 +2829,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -2652,6 +2837,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 8, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_local_s96_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2666,6 +2852,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -2680,6 +2867,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_local_s96_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2694,6 +2882,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -2724,6 +2913,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_local_s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2736,6 +2926,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_s96_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -2750,6 +2941,7 @@ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_local_s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2764,6 +2956,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_local_s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2778,6 +2971,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -2785,6 +2979,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_local_s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2799,6 +2994,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -2813,6 +3009,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_local_s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2827,6 +3024,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -2873,6 +3071,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_local_s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2901,6 +3100,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_s96_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -2929,6 +3129,7 @@ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_local_s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2957,6 +3158,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_local_s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2985,6 +3187,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -2992,6 +3195,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_local_s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3020,6 +3224,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -3034,6 +3239,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_local_s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3062,6 +3268,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -3133,6 +3340,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_local_s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3186,6 +3394,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_s96_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -3239,6 +3448,7 @@ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_local_s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3292,6 +3502,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_local_s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3345,6 +3556,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -3352,6 +3564,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_local_s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3405,6 +3618,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -3419,6 +3633,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_local_s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3472,6 +3687,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -3559,6 +3775,7 @@ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_local_s128_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3628,6 +3845,7 @@ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-DS128-LABEL: name: test_load_local_s128_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -3696,6 +3914,7 @@ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_local_s128_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3764,6 +3983,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_local_s128_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3832,6 +4052,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -3839,6 +4060,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_local_s128_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3907,6 +4129,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -3924,6 +4147,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_local_s128_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3992,6 +4216,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -4021,6 +4246,7 @@ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_local_s128_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4032,6 +4258,7 @@ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-DS128-LABEL: name: test_load_local_s128_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -4039,6 +4266,7 @@ ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_local_s128_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4046,6 +4274,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_local_s128_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4053,6 +4282,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -4060,6 +4290,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_local_s128_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4077,6 +4308,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -4094,6 +4326,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_local_s128_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4101,6 +4334,7 @@ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -4130,6 +4364,7 @@ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_local_s128_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4141,6 +4376,7 @@ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-DS128-LABEL: name: test_load_local_s128_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -4158,6 +4394,7 @@ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_local_s128_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4175,6 +4412,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_local_s128_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4192,6 +4430,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -4199,6 +4438,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_local_s128_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4216,6 +4456,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -4233,6 +4474,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_local_s128_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4250,6 +4492,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -4304,6 +4547,7 @@ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_local_s128_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4340,6 +4584,7 @@ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-DS128-LABEL: name: test_load_local_s128_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -4375,6 +4620,7 @@ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_local_s128_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4410,6 +4656,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_local_s128_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4445,6 +4692,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -4452,6 +4700,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_local_s128_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4487,6 +4736,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -4504,6 +4754,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_local_s128_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4539,6 +4790,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -4626,6 +4878,7 @@ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_local_s128_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4695,6 +4948,7 @@ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-DS128-LABEL: name: test_load_local_s128_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -4763,6 +5017,7 @@ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_local_s128_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4831,6 +5086,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_local_s128_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4899,6 +5155,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -4906,6 +5163,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_local_s128_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4974,6 +5232,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -4991,6 +5250,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_local_s128_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5059,6 +5319,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5083,54 +5344,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-LABEL: name: test_load_local_p1_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-DS128-LABEL: name: test_load_local_p1_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_local_p1_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: test_load_local_p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-LABEL: name: test_load_local_p1_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX11-LABEL: name: test_load_local_p1_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5163,36 +5433,42 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-LABEL: name: test_load_local_p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; CI-DS128-LABEL: name: test_load_local_p1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; VI-LABEL: name: test_load_local_p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-LABEL: name: test_load_local_p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-LABEL: name: test_load_local_p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5208,6 +5484,7 @@ ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -5223,12 +5500,14 @@ ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX11-LABEL: name: test_load_local_p1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5271,6 +5550,7 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-LABEL: name: test_load_local_p1_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5296,6 +5576,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-DS128-LABEL: name: test_load_local_p1_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -5321,6 +5602,7 @@ ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; VI-LABEL: name: test_load_local_p1_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5346,6 +5628,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-LABEL: name: test_load_local_p1_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5371,12 +5654,14 @@ ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-LABEL: name: test_load_local_p1_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5402,6 +5687,7 @@ ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -5417,6 +5703,7 @@ ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX11-LABEL: name: test_load_local_p1_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5442,6 +5729,7 @@ ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5502,6 +5790,7 @@ ; SI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-LABEL: name: test_load_local_p1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5545,6 +5834,7 @@ ; CI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; CI-DS128-LABEL: name: test_load_local_p1_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -5588,6 +5878,7 @@ ; CI-DS128-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; VI-LABEL: name: test_load_local_p1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5631,6 +5922,7 @@ ; VI-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-LABEL: name: test_load_local_p1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5674,12 +5966,14 @@ ; GFX9-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; ; GFX10-LABEL: name: test_load_local_p1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5723,6 +6017,7 @@ ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -5738,6 +6033,7 @@ ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX11-LABEL: name: test_load_local_p1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5781,6 +6077,7 @@ ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5804,54 +6101,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; CI-LABEL: name: test_load_local_p3_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; CI-DS128-LABEL: name: test_load_local_p3_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; VI-LABEL: name: test_load_local_p3_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: test_load_local_p3_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-LABEL: name: test_load_local_p3_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-LABEL: name: test_load_local_p3_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -5882,6 +6188,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-LABEL: name: test_load_local_p3_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5895,6 +6202,7 @@ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-DS128-LABEL: name: test_load_local_p3_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -5908,6 +6216,7 @@ ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; VI-LABEL: name: test_load_local_p3_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5921,6 +6230,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-LABEL: name: test_load_local_p3_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5934,12 +6244,14 @@ ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-LABEL: name: test_load_local_p3_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5953,12 +6265,14 @@ ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-LABEL: name: test_load_local_p3_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5972,6 +6286,7 @@ ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6012,6 +6327,7 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-LABEL: name: test_load_local_p3_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6035,6 +6351,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-DS128-LABEL: name: test_load_local_p3_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -6058,6 +6375,7 @@ ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; VI-LABEL: name: test_load_local_p3_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6081,6 +6399,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-LABEL: name: test_load_local_p3_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6104,12 +6423,14 @@ ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-LABEL: name: test_load_local_p3_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6133,12 +6454,14 @@ ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-LABEL: name: test_load_local_p3_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6162,6 +6485,7 @@ ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6185,54 +6509,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-LABEL: name: test_load_local_p5_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-DS128-LABEL: name: test_load_local_p5_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; VI-LABEL: name: test_load_local_p5_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-LABEL: name: test_load_local_p5_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX10-LABEL: name: test_load_local_p5_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX11-LABEL: name: test_load_local_p5_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6263,6 +6596,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-LABEL: name: test_load_local_p5_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6276,6 +6610,7 @@ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-DS128-LABEL: name: test_load_local_p5_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -6289,6 +6624,7 @@ ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_local_p5_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6302,6 +6638,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_local_p5_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6315,12 +6652,14 @@ ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX10-LABEL: name: test_load_local_p5_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6334,12 +6673,14 @@ ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX11-LABEL: name: test_load_local_p5_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6353,6 +6694,7 @@ ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6393,6 +6735,7 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-LABEL: name: test_load_local_p5_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6416,6 +6759,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-DS128-LABEL: name: test_load_local_p5_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -6439,6 +6783,7 @@ ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-DS128-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_local_p5_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6462,6 +6807,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_local_p5_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6485,12 +6831,14 @@ ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX10-LABEL: name: test_load_local_p5_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6514,12 +6862,14 @@ ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX11-LABEL: name: test_load_local_p5_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6543,6 +6893,7 @@ ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6566,54 +6917,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_v2s8_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_v2s8_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_v2s8_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_v2s8_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_v2s8_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_v2s8_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6647,6 +7007,7 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v2s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6661,6 +7022,7 @@ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s8_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -6675,6 +7037,7 @@ ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v2s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6689,6 +7052,7 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v2s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6703,6 +7067,7 @@ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -6712,6 +7077,7 @@ ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v2s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6726,6 +7092,7 @@ ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -6735,6 +7102,7 @@ ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v2s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6749,6 +7117,7 @@ ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -6780,8 +7149,8 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -6801,6 +7170,7 @@ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-LABEL: name: test_load_local_v3s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6811,8 +7181,8 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -6832,6 +7202,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_v3s8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -6842,8 +7213,8 @@ ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -6863,6 +7234,7 @@ ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_local_v3s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6873,8 +7245,8 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -6892,6 +7264,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_local_v3s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6902,8 +7275,8 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -6921,6 +7294,7 @@ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -6931,8 +7305,8 @@ ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -6950,6 +7324,7 @@ ; GFX9-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX10-LABEL: name: test_load_local_v3s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6960,8 +7335,8 @@ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -6979,6 +7354,7 @@ ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -6989,8 +7365,8 @@ ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -7008,6 +7384,7 @@ ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX11-LABEL: name: test_load_local_v3s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7018,8 +7395,8 @@ ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -7037,6 +7414,7 @@ ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -7047,8 +7425,8 @@ ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -7099,8 +7477,8 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -7120,6 +7498,7 @@ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; SI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; CI-LABEL: name: test_load_local_v3s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7140,8 +7519,8 @@ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -7161,6 +7540,7 @@ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_v3s8_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -7181,8 +7561,8 @@ ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -7202,6 +7582,7 @@ ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-DS128-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; VI-LABEL: name: test_load_local_v3s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7222,8 +7603,8 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -7241,6 +7622,7 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-LABEL: name: test_load_local_v3s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7261,8 +7643,8 @@ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -7280,6 +7662,7 @@ ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX9-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -7295,8 +7678,8 @@ ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX9-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -7314,6 +7697,7 @@ ; GFX9-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; ; GFX10-LABEL: name: test_load_local_v3s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7334,8 +7718,8 @@ ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -7353,6 +7737,7 @@ ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX10-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -7368,8 +7753,8 @@ ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX10-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -7387,6 +7772,7 @@ ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; ; GFX11-LABEL: name: test_load_local_v3s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7407,8 +7793,8 @@ ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -7426,6 +7812,7 @@ ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX11-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -7441,8 +7828,8 @@ ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -7479,54 +7866,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_local_v4s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_load_local_v4s8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_local_v4s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_local_v4s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_local_v4s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_local_v4s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -7551,54 +7947,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v8s8_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v8s8_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v8s8_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v8s8_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v8s8_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v8s8_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -7683,6 +8088,7 @@ ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v16s8_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7749,6 +8155,7 @@ ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v16s8_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -7816,6 +8223,7 @@ ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v16s8_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7883,6 +8291,7 @@ ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v16s8_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7950,12 +8359,14 @@ ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v16s8_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8023,6 +8434,7 @@ ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -8039,6 +8451,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v16s8_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8106,6 +8519,7 @@ ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -8130,54 +8544,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-LABEL: name: test_load_local_v2s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s16_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; VI-LABEL: name: test_load_local_v2s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v2s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v2s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v2s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -8211,6 +8634,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-LABEL: name: test_load_local_v2s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8227,6 +8651,7 @@ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -8243,6 +8668,7 @@ ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_local_v2s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8259,6 +8685,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v2s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8271,12 +8698,14 @@ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v2s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8289,12 +8718,14 @@ ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v2s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8307,6 +8738,7 @@ ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -8350,6 +8782,7 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-LABEL: name: test_load_local_v2s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8376,6 +8809,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -8402,6 +8836,7 @@ ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-DS128-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_local_v2s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8428,6 +8863,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v2s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8450,12 +8886,14 @@ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v2s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8478,12 +8916,14 @@ ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v2s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8506,6 +8946,7 @@ ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -8542,13 +8983,13 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_local_v3s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8568,13 +9009,13 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -8594,13 +9035,13 @@ ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_local_v3s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8620,13 +9061,13 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v3s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8648,6 +9089,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -8669,6 +9111,7 @@ ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v3s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8690,6 +9133,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -8711,6 +9155,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v3s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8732,6 +9177,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -8794,13 +9240,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_local_v3s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8829,13 +9275,13 @@ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -8864,13 +9310,13 @@ ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_local_v3s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8899,13 +9345,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v3s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8934,6 +9380,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -8962,6 +9409,7 @@ ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v3s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8990,6 +9438,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -9018,6 +9467,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v3s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9046,6 +9496,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -9129,13 +9580,13 @@ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_local_v3s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9178,13 +9629,13 @@ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -9227,13 +9678,13 @@ ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-DS128-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_local_v3s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9276,13 +9727,13 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v3s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9325,6 +9776,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -9353,6 +9805,7 @@ ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v3s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9395,6 +9848,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -9423,6 +9877,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v3s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9465,6 +9920,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -9511,54 +9967,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-LABEL: name: test_load_local_v4s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_local_v4s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v4s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v4s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v4s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -9604,36 +10069,42 @@ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_local_v4s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; VI-LABEL: name: test_load_local_v4s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v4s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v4s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9656,6 +10127,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -9678,12 +10150,14 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v4s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -9728,6 +10202,7 @@ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_local_v4s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9756,6 +10231,7 @@ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -9784,6 +10260,7 @@ ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_local_v4s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9812,6 +10289,7 @@ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v4s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9834,12 +10312,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v4s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9862,6 +10342,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -9884,6 +10365,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v4s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9906,6 +10388,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -9969,6 +10452,7 @@ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_local_v4s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10015,6 +10499,7 @@ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s16_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -10061,6 +10546,7 @@ ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_local_v4s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10107,6 +10593,7 @@ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_local_v4s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10147,12 +10634,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_local_v4s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10193,6 +10682,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -10215,6 +10705,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_local_v4s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10255,6 +10746,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -10278,54 +10770,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v2s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v2s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v2s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v2s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v2s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -10349,54 +10850,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v2s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v2s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v2s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v2s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v2s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -10434,6 +10944,7 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v2s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10454,6 +10965,7 @@ ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -10474,6 +10986,7 @@ ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v2s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10494,6 +11007,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v2s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10514,12 +11028,14 @@ ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v2s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10540,6 +11056,7 @@ ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -10550,6 +11067,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v2s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10570,6 +11088,7 @@ ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -10625,6 +11144,7 @@ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_local_v2s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10663,6 +11183,7 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s32_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -10701,6 +11222,7 @@ ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_local_v2s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10739,6 +11261,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v2s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10777,12 +11300,14 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v2s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10821,6 +11346,7 @@ ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -10831,6 +11357,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v2s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10869,6 +11396,7 @@ ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -10938,6 +11466,7 @@ ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-LABEL: name: test_load_local_v3s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10990,6 +11519,7 @@ ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s32_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -11042,6 +11572,7 @@ ; CI-DS128-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_local_v3s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11094,6 +11625,7 @@ ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v3s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11146,12 +11678,14 @@ ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v3s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11204,6 +11738,7 @@ ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -11217,6 +11752,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v3s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11269,6 +11805,7 @@ ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -11297,6 +11834,7 @@ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-LABEL: name: test_load_local_v3s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11308,6 +11846,7 @@ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -11321,6 +11860,7 @@ ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_local_v3s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11334,6 +11874,7 @@ ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v3s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11347,12 +11888,14 @@ ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v3s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11366,6 +11909,7 @@ ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -11379,6 +11923,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v3s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11392,6 +11937,7 @@ ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -11419,6 +11965,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v4s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11429,48 +11976,56 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v4s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v4s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v4s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v4s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -11498,6 +12053,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v4s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11508,30 +12064,35 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, addrspace 3) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v4s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v4s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v4s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11548,6 +12109,7 @@ ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -11564,12 +12126,14 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v4s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -11597,6 +12161,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v4s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11607,6 +12172,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s32>) from unknown-address + 8, align 4, addrspace 3) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -11623,6 +12189,7 @@ ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v4s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11639,6 +12206,7 @@ ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v4s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11655,12 +12223,14 @@ ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v4s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11677,6 +12247,7 @@ ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -11693,6 +12264,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v4s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11709,6 +12281,7 @@ ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -11761,6 +12334,7 @@ ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v4s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11796,6 +12370,7 @@ ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -11830,6 +12405,7 @@ ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v4s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11864,6 +12440,7 @@ ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v4s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11898,12 +12475,14 @@ ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v4s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11938,6 +12517,7 @@ ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -11954,6 +12534,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v4s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11988,6 +12569,7 @@ ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -12073,6 +12655,7 @@ ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-LABEL: name: test_load_local_v4s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12141,6 +12724,7 @@ ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s32_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -12208,6 +12792,7 @@ ; CI-DS128-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_local_v4s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12275,6 +12860,7 @@ ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v4s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12342,12 +12928,14 @@ ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v4s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12415,6 +13003,7 @@ ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -12431,6 +13020,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v4s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12498,6 +13088,7 @@ ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -12531,6 +13122,7 @@ ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; CI-LABEL: name: test_load_local_v8s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12547,6 +13139,7 @@ ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<2 x s32>) from unknown-address + 24, addrspace 3) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v8s32_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -12557,6 +13150,7 @@ ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; VI-LABEL: name: test_load_local_v8s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12567,6 +13161,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v8s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12577,6 +13172,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -12587,6 +13183,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v8s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12597,6 +13194,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -12607,6 +13205,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v8s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12617,6 +13216,7 @@ ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -12666,6 +13266,7 @@ ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x s32>) from unknown-address + 56, addrspace 3) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; CI-LABEL: name: test_load_local_v16s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12694,6 +13295,7 @@ ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (<2 x s32>) from unknown-address + 56, addrspace 3) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; CI-DS128-LABEL: name: test_load_local_v16s32_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -12710,6 +13312,7 @@ ; CI-DS128-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; VI-LABEL: name: test_load_local_v16s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12726,6 +13329,7 @@ ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX9-LABEL: name: test_load_local_v16s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12742,6 +13346,7 @@ ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -12758,6 +13363,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX10-LABEL: name: test_load_local_v16s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12774,6 +13380,7 @@ ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -12790,6 +13397,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX11-LABEL: name: test_load_local_v16s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12806,6 +13414,7 @@ ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -12843,6 +13452,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-LABEL: name: test_load_local_v2s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12853,6 +13463,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s64_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -12863,6 +13474,7 @@ ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_local_v2s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12873,6 +13485,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_local_v2s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12883,12 +13496,14 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX10-LABEL: name: test_load_local_v2s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12899,6 +13514,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -12909,6 +13525,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-LABEL: name: test_load_local_v2s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12919,6 +13536,7 @@ ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -13012,6 +13630,7 @@ ; SI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-LABEL: name: test_load_local_v2s64_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -13088,6 +13707,7 @@ ; CI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-DS128-LABEL: name: test_load_local_v2s64_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -13164,6 +13784,7 @@ ; CI-DS128-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_local_v2s64_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -13240,6 +13861,7 @@ ; VI-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_local_v2s64_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -13316,12 +13938,14 @@ ; GFX9-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; ; GFX10-LABEL: name: test_load_local_v2s64_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -13398,6 +14022,7 @@ ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -13423,6 +14048,7 @@ ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[ZEXT1]] ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-LABEL: name: test_load_local_v2s64_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -13499,6 +14125,7 @@ ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -13531,6 +14158,7 @@ ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-LABEL: name: test_load_local_v3s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -13546,6 +14174,7 @@ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-DS128-LABEL: name: test_load_local_v3s64_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -13559,6 +14188,7 @@ ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_local_v3s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -13572,6 +14202,7 @@ ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_local_v3s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -13585,6 +14216,7 @@ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -13598,6 +14230,7 @@ ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX10-LABEL: name: test_load_local_v3s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -13611,6 +14244,7 @@ ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -13624,6 +14258,7 @@ ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX11-LABEL: name: test_load_local_v3s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -13637,6 +14272,7 @@ ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -13679,6 +14315,7 @@ ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-LABEL: name: test_load_local_v4s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -13695,6 +14332,7 @@ ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-DS128-LABEL: name: test_load_local_v4s64_align32 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -13705,6 +14343,7 @@ ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; VI-LABEL: name: test_load_local_v4s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -13715,6 +14354,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_local_v4s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -13725,6 +14365,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -13735,6 +14376,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX10-LABEL: name: test_load_local_v4s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -13745,6 +14387,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -13755,6 +14398,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX11-LABEL: name: test_load_local_v4s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -13765,6 +14409,7 @@ ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -13797,6 +14442,7 @@ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-LABEL: name: test_load_local_v2p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -13808,6 +14454,7 @@ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-DS128-LABEL: name: test_load_local_v2p1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -13825,6 +14472,7 @@ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_local_v2p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -13842,6 +14490,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_local_v2p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -13859,6 +14508,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -13866,6 +14516,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX10-LABEL: name: test_load_local_v2p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -13883,6 +14534,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -13900,6 +14552,7 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX11-LABEL: name: test_load_local_v2p1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -13917,6 +14570,7 @@ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -13941,54 +14595,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-LABEL: name: test_load_local_v2p3_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; CI-DS128-LABEL: name: test_load_local_v2p3_align8 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; VI-LABEL: name: test_load_local_v2p3_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_local_v2p3_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX10-LABEL: name: test_load_local_v2p3_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX11-LABEL: name: test_load_local_v2p3_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14012,54 +14675,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_extload_local_s32_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_extload_local_s32_from_1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_extload_local_s32_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14083,54 +14755,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_extload_local_s32_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-DS128-LABEL: name: test_extload_local_s32_from_2_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_extload_local_s32_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14156,6 +14837,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_extload_local_s64_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14163,6 +14845,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14170,6 +14853,7 @@ ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_extload_local_s64_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14177,6 +14861,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14184,6 +14869,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14191,6 +14877,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14198,6 +14885,7 @@ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14205,6 +14893,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14212,6 +14901,7 @@ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14237,6 +14927,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_extload_local_s64_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14244,6 +14935,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14251,6 +14943,7 @@ ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_extload_local_s64_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14258,6 +14951,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14265,6 +14959,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14272,6 +14967,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14279,6 +14975,7 @@ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14286,6 +14983,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14293,6 +14991,7 @@ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14318,6 +15017,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_extload_local_s64_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14325,6 +15025,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_4_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14332,6 +15033,7 @@ ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_extload_local_s64_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14339,6 +15041,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14346,6 +15049,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14353,6 +15057,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14360,6 +15065,7 @@ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14367,6 +15073,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14374,6 +15081,7 @@ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14402,6 +15110,7 @@ ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; CI-LABEL: name: test_extload_local_s128_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14412,6 +15121,7 @@ ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; CI-DS128-LABEL: name: test_extload_local_s128_from_4_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14422,6 +15132,7 @@ ; CI-DS128-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-DS128-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; VI-LABEL: name: test_extload_local_s128_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14432,6 +15143,7 @@ ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14442,6 +15154,7 @@ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14452,6 +15165,7 @@ ; GFX9-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX10-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14462,6 +15176,7 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14472,6 +15187,7 @@ ; GFX10-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX11-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14482,6 +15198,7 @@ ; GFX11-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX11-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14510,6 +15227,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_extload_local_s64_from_2_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14517,6 +15235,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14524,6 +15243,7 @@ ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_extload_local_s64_from_2_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14531,6 +15251,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14538,6 +15259,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14545,6 +15267,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14552,6 +15275,7 @@ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14559,6 +15283,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14566,6 +15291,7 @@ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14591,6 +15317,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_extload_local_s64_from_1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -14598,6 +15325,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -14605,6 +15333,7 @@ ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_extload_local_s64_from_1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -14612,6 +15341,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -14619,6 +15349,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -14626,6 +15357,7 @@ ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -14633,6 +15365,7 @@ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -14640,6 +15373,7 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -14647,6 +15381,7 @@ ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14671,54 +15406,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14742,54 +15486,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14813,54 +15566,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_local_v2s32_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_local_v2s32_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14884,54 +15646,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-LABEL: name: test_extload_local_v3s32_from_6_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-DS128-LABEL: name: test_extload_local_v3s32_from_6_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_extload_local_v3s32_from_6_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX10-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX11-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -14955,54 +15726,63 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-LABEL: name: test_extload_local_v4s32_from_8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-DS128-LABEL: name: test_extload_local_v4s32_from_8_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_extload_local_v4s32_from_8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -15121,6 +15901,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_local_v2s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -15222,6 +16003,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align1 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -15323,6 +16105,7 @@ ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_local_v2s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -15424,6 +16207,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_local_v2s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -15525,6 +16309,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -15539,6 +16324,7 @@ ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_local_v2s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -15640,6 +16426,7 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -15666,6 +16453,7 @@ ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_local_v2s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -15767,6 +16555,7 @@ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -15847,6 +16636,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_local_v2s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -15899,6 +16689,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -15951,6 +16742,7 @@ ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_local_v2s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -16003,6 +16795,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_local_v2s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16055,6 +16848,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16069,6 +16863,7 @@ ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_local_v2s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16121,6 +16916,7 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16147,6 +16943,7 @@ ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_local_v2s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16199,6 +16996,7 @@ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -16250,6 +17048,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_local_v2s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -16273,6 +17072,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align4 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -16299,6 +17099,7 @@ ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_local_v2s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -16325,6 +17126,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_local_v2s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16351,6 +17153,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16365,6 +17168,7 @@ ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_local_v2s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16391,6 +17195,7 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16417,6 +17222,7 @@ ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_local_v2s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16443,6 +17249,7 @@ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} @@ -16494,6 +17301,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_local_v2s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -16517,6 +17325,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-DS128-LABEL: name: test_load_local_v2s96_align16 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} @@ -16538,6 +17347,7 @@ ; CI-DS128-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-DS128-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_local_v2s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -16559,6 +17369,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_local_v2s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -16580,6 +17391,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} @@ -16594,6 +17406,7 @@ ; GFX9-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_local_v2s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -16615,6 +17428,7 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} @@ -16636,6 +17450,7 @@ ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_local_v2s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -16657,6 +17472,7 @@ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -20,6 +20,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-LABEL: name: test_load_private_s1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -28,6 +29,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_private_s1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -36,6 +38,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_private_s1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -44,6 +47,7 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-LABEL: name: test_load_private_s1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -52,6 +56,7 @@ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-LABEL: name: test_load_private_s1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -80,6 +85,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; SI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; CI-LABEL: name: test_load_private_s2_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -88,6 +94,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; CI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; VI-LABEL: name: test_load_private_s2_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -96,6 +103,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; VI-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_load_private_s2_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -104,6 +112,7 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX10-LABEL: name: test_load_private_s2_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -112,6 +121,7 @@ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX11-LABEL: name: test_load_private_s2_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -138,30 +148,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -186,30 +201,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -234,30 +254,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -282,30 +307,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -336,6 +366,7 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_private_s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -348,6 +379,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_private_s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -360,6 +392,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_private_s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -372,6 +405,7 @@ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-LABEL: name: test_load_private_s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -384,6 +418,7 @@ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-LABEL: name: test_load_private_s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -408,30 +443,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -461,6 +501,7 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_private_s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -473,6 +514,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_private_s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -485,6 +527,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_private_s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -497,6 +540,7 @@ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-LABEL: name: test_load_private_s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -509,6 +553,7 @@ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-LABEL: name: test_load_private_s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -548,6 +593,7 @@ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-LABEL: name: test_load_private_s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -570,6 +616,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_private_s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -592,6 +639,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_private_s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -614,6 +662,7 @@ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX10-LABEL: name: test_load_private_s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -636,6 +685,7 @@ ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX11-LABEL: name: test_load_private_s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -659,30 +709,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s24_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s24_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s24_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s24_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s24_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -707,30 +762,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_s24_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_s24_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_s24_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_s24_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_s24_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -761,6 +821,7 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; CI-LABEL: name: test_load_private_s24_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -773,6 +834,7 @@ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_load_private_s24_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -785,6 +847,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_load_private_s24_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -797,6 +860,7 @@ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX10-LABEL: name: test_load_private_s24_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -809,6 +873,7 @@ ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX11-LABEL: name: test_load_private_s24_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -851,6 +916,7 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; SI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; CI-LABEL: name: test_load_private_s24_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -869,6 +935,7 @@ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; VI-LABEL: name: test_load_private_s24_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -887,6 +954,7 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; VI-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX9-LABEL: name: test_load_private_s24_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -905,6 +973,7 @@ ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX10-LABEL: name: test_load_private_s24_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -923,6 +992,7 @@ ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX10-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; ; GFX11-LABEL: name: test_load_private_s24_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -959,15 +1029,15 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; CI-LABEL: name: test_load_private_s48_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -980,15 +1050,15 @@ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_private_s48_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1001,15 +1071,15 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_private_s48_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1022,15 +1092,15 @@ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX10-LABEL: name: test_load_private_s48_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1043,15 +1113,15 @@ ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX11-LABEL: name: test_load_private_s48_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1080,6 +1150,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; CI-LABEL: name: test_load_private_s64_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1090,6 +1161,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_private_s64_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1100,6 +1172,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_private_s64_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1110,6 +1183,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX10-LABEL: name: test_load_private_s64_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1120,6 +1194,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX11-LABEL: name: test_load_private_s64_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1147,6 +1222,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; CI-LABEL: name: test_load_private_s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1157,6 +1233,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_private_s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1167,6 +1244,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_private_s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1177,6 +1255,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX10-LABEL: name: test_load_private_s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1187,6 +1266,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX11-LABEL: name: test_load_private_s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1224,6 +1304,7 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; CI-LABEL: name: test_load_private_s64_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1244,6 +1325,7 @@ ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_private_s64_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1264,6 +1346,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_private_s64_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1284,6 +1367,7 @@ ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX10-LABEL: name: test_load_private_s64_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1304,6 +1388,7 @@ ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX11-LABEL: name: test_load_private_s64_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1359,6 +1444,7 @@ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; CI-LABEL: name: test_load_private_s64_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1397,6 +1483,7 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; VI-LABEL: name: test_load_private_s64_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1435,6 +1522,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_load_private_s64_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1473,6 +1561,7 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX10-LABEL: name: test_load_private_s64_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1511,6 +1600,7 @@ ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX11-LABEL: name: test_load_private_s64_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1581,6 +1671,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_private_s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1634,6 +1725,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_private_s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1687,6 +1779,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_private_s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1740,6 +1833,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_private_s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1793,6 +1887,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_private_s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1825,6 +1920,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_private_s96_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1839,6 +1935,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_private_s96_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1853,6 +1950,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_private_s96_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1867,6 +1965,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_private_s96_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1881,6 +1980,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_private_s96_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -1913,6 +2013,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_private_s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -1927,6 +2028,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_private_s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -1941,6 +2043,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_private_s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -1955,6 +2058,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_private_s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -1969,6 +2073,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_private_s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2015,6 +2120,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_private_s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2043,6 +2149,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_private_s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2071,6 +2178,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_private_s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2099,6 +2207,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_private_s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2127,6 +2236,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_private_s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2198,6 +2308,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; CI-LABEL: name: test_load_private_s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2251,6 +2362,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; VI-LABEL: name: test_load_private_s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2304,6 +2416,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX9-LABEL: name: test_load_private_s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2357,6 +2470,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX10-LABEL: name: test_load_private_s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2410,6 +2524,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; ; GFX11-LABEL: name: test_load_private_s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2496,6 +2611,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_private_s128_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2564,6 +2680,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_private_s128_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2632,6 +2749,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_private_s128_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2700,6 +2818,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_private_s128_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2768,6 +2887,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_private_s128_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2803,6 +2923,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_private_s128_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2820,6 +2941,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_private_s128_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2837,6 +2959,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_private_s128_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2854,6 +2977,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_private_s128_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2871,6 +2995,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_private_s128_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -2906,6 +3031,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_private_s128_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -2923,6 +3049,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_private_s128_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -2940,6 +3067,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_private_s128_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -2957,6 +3085,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_private_s128_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -2974,6 +3103,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_private_s128_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3027,6 +3157,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_private_s128_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3062,6 +3193,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_private_s128_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3097,6 +3229,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_private_s128_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3132,6 +3265,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_private_s128_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3167,6 +3301,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_private_s128_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3253,6 +3388,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; CI-LABEL: name: test_load_private_s128_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3321,6 +3457,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; VI-LABEL: name: test_load_private_s128_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3389,6 +3526,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX9-LABEL: name: test_load_private_s128_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3457,6 +3595,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX10-LABEL: name: test_load_private_s128_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3525,6 +3664,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; ; GFX11-LABEL: name: test_load_private_s128_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3553,6 +3693,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; CI-LABEL: name: test_load_private_p1_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3563,6 +3704,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_private_p1_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3573,6 +3715,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_private_p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3583,6 +3726,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX10-LABEL: name: test_load_private_p1_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3593,6 +3737,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX11-LABEL: name: test_load_private_p1_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3620,6 +3765,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; CI-LABEL: name: test_load_private_p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3630,6 +3776,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_private_p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3640,6 +3787,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_private_p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3650,6 +3798,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX10-LABEL: name: test_load_private_p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3660,6 +3809,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX11-LABEL: name: test_load_private_p1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3697,6 +3847,7 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; CI-LABEL: name: test_load_private_p1_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3717,6 +3868,7 @@ ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_private_p1_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3737,6 +3889,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_private_p1_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3757,6 +3910,7 @@ ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX10-LABEL: name: test_load_private_p1_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3777,6 +3931,7 @@ ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX11-LABEL: name: test_load_private_p1_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -3832,6 +3987,7 @@ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; SI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; CI-LABEL: name: test_load_private_p1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -3870,6 +4026,7 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; VI-LABEL: name: test_load_private_p1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -3908,6 +4065,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX9-LABEL: name: test_load_private_p1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -3946,6 +4104,7 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX10-LABEL: name: test_load_private_p1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -3984,6 +4143,7 @@ ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; ; GFX11-LABEL: name: test_load_private_p1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4007,30 +4167,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; CI-LABEL: name: test_load_private_p3_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; VI-LABEL: name: test_load_private_p3_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX9-LABEL: name: test_load_private_p3_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX10-LABEL: name: test_load_private_p3_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; ; GFX11-LABEL: name: test_load_private_p3_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4061,6 +4226,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-LABEL: name: test_load_private_p3_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4074,6 +4240,7 @@ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; VI-LABEL: name: test_load_private_p3_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4087,6 +4254,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-LABEL: name: test_load_private_p3_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4100,6 +4268,7 @@ ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX10-LABEL: name: test_load_private_p3_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4113,6 +4282,7 @@ ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX11-LABEL: name: test_load_private_p3_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4153,6 +4323,7 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; CI-LABEL: name: test_load_private_p3_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4176,6 +4347,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; VI-LABEL: name: test_load_private_p3_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4199,6 +4371,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX9-LABEL: name: test_load_private_p3_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4222,6 +4395,7 @@ ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX10-LABEL: name: test_load_private_p3_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4245,6 +4419,7 @@ ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; ; GFX11-LABEL: name: test_load_private_p3_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4268,30 +4443,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; CI-LABEL: name: test_load_private_p5_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; VI-LABEL: name: test_load_private_p5_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX9-LABEL: name: test_load_private_p5_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX10-LABEL: name: test_load_private_p5_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; ; GFX11-LABEL: name: test_load_private_p5_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4322,6 +4502,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-LABEL: name: test_load_private_p5_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4335,6 +4516,7 @@ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_private_p5_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4348,6 +4530,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_private_p5_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4361,6 +4544,7 @@ ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX10-LABEL: name: test_load_private_p5_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4374,6 +4558,7 @@ ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX11-LABEL: name: test_load_private_p5_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4414,6 +4599,7 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; SI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; CI-LABEL: name: test_load_private_p5_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4437,6 +4623,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; VI-LABEL: name: test_load_private_p5_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4460,6 +4647,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; VI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX9-LABEL: name: test_load_private_p5_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4483,6 +4671,7 @@ ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX10-LABEL: name: test_load_private_p5_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4506,6 +4695,7 @@ ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; ; GFX11-LABEL: name: test_load_private_p5_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4529,30 +4719,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_v2s8_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_v2s8_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_v2s8_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_v2s8_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_v2s8_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4586,6 +4781,7 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v2s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4600,6 +4796,7 @@ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v2s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4614,6 +4811,7 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v2s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4628,6 +4826,7 @@ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v2s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4642,6 +4841,7 @@ ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v2s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4673,8 +4873,8 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4694,6 +4894,7 @@ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; CI-LABEL: name: test_load_private_v3s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4704,8 +4905,8 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4725,6 +4926,7 @@ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; VI-LABEL: name: test_load_private_v3s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4735,8 +4937,8 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -4754,6 +4956,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_load_private_v3s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4764,8 +4967,8 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -4783,6 +4986,7 @@ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX10-LABEL: name: test_load_private_v3s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -4793,8 +4997,8 @@ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -4812,6 +5016,7 @@ ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX11-LABEL: name: test_load_private_v3s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -4822,8 +5027,8 @@ ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] @@ -4874,8 +5079,8 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4895,6 +5100,7 @@ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; SI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; CI-LABEL: name: test_load_private_v3s8_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -4915,8 +5121,8 @@ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -4936,6 +5142,7 @@ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; VI-LABEL: name: test_load_private_v3s8_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -4956,8 +5163,8 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -4975,6 +5182,7 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX9-LABEL: name: test_load_private_v3s8_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -4995,8 +5203,8 @@ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -5014,6 +5222,7 @@ ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX9-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX10-LABEL: name: test_load_private_v3s8_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5034,8 +5243,8 @@ ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] @@ -5053,6 +5262,7 @@ ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX10-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; ; GFX11-LABEL: name: test_load_private_v3s8_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5068,8 +5278,8 @@ ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -5106,30 +5316,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_load_private_v4s8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_load_private_v4s8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_load_private_v4s8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_load_private_v4s8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_load_private_v4s8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5158,6 +5373,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v8s8_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5168,6 +5384,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v8s8_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5178,6 +5395,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v8s8_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5188,6 +5406,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v8s8_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5198,6 +5417,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v8s8_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5283,6 +5503,7 @@ ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v16s8_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5350,6 +5571,7 @@ ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v16s8_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5417,6 +5639,7 @@ ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v16s8_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5484,6 +5707,7 @@ ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v16s8_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5551,6 +5775,7 @@ ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v16s8_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5575,30 +5800,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; CI-LABEL: name: test_load_private_v2s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; VI-LABEL: name: test_load_private_v2s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v2s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v2s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v2s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5632,6 +5862,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-LABEL: name: test_load_private_v2s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5648,6 +5879,7 @@ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_private_v2s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5664,6 +5896,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v2s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5676,6 +5909,7 @@ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v2s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5688,6 +5922,7 @@ ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v2s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5731,6 +5966,7 @@ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; CI-LABEL: name: test_load_private_v2s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5757,6 +5993,7 @@ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; VI-LABEL: name: test_load_private_v2s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5783,6 +6020,7 @@ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v2s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5805,6 +6043,7 @@ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v2s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -5827,6 +6066,7 @@ ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v2s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -5862,22 +6102,21 @@ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_private_v3s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -5896,22 +6135,21 @@ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_private_v3s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -5930,22 +6168,21 @@ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v3s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -5973,6 +6210,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v3s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6000,6 +6238,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v3s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6062,13 +6301,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_private_v3s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6097,13 +6336,13 @@ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_private_v3s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6132,13 +6371,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v3s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6167,6 +6406,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v3s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6195,6 +6435,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v3s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6278,13 +6519,13 @@ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; CI-LABEL: name: test_load_private_v3s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6327,13 +6568,13 @@ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_load_private_v3s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6376,13 +6617,13 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v3s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6425,6 +6666,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v3s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6467,6 +6709,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v3s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6517,6 +6760,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_private_v4s16_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6527,6 +6771,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_private_v4s16_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6537,6 +6782,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v4s16_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6547,6 +6793,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v4s16_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6557,6 +6804,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v4s16_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6584,6 +6832,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_private_v4s16_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6594,6 +6843,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_private_v4s16_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6604,6 +6854,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v4s16_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6614,6 +6865,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v4s16_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6624,6 +6876,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v4s16_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6667,6 +6920,7 @@ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_private_v4s16_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6694,6 +6948,7 @@ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_private_v4s16_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6721,6 +6976,7 @@ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v4s16_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6742,6 +6998,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v4s16_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6763,6 +7020,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v4s16_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -6825,6 +7083,7 @@ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; CI-LABEL: name: test_load_private_v4s16_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -6870,6 +7129,7 @@ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_load_private_v4s16_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -6915,6 +7175,7 @@ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_load_private_v4s16_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -6954,6 +7215,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX10-LABEL: name: test_load_private_v4s16_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -6993,6 +7255,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX11-LABEL: name: test_load_private_v4s16_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7020,6 +7283,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v2s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7030,6 +7294,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v2s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7040,6 +7305,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v2s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7050,6 +7316,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v2s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7060,6 +7327,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v2s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7087,6 +7355,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v2s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7097,6 +7366,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v2s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7107,6 +7377,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v2s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7117,6 +7388,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v2s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7127,6 +7399,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v2s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7164,6 +7437,7 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v2s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7184,6 +7458,7 @@ ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v2s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7204,6 +7479,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v2s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7224,6 +7500,7 @@ ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v2s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7244,6 +7521,7 @@ ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v2s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7299,6 +7577,7 @@ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; CI-LABEL: name: test_load_private_v2s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7337,6 +7616,7 @@ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_load_private_v2s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7375,6 +7655,7 @@ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v2s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7413,6 +7694,7 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v2s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7451,6 +7733,7 @@ ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v2s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7520,6 +7803,7 @@ ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-LABEL: name: test_load_private_v3s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7572,6 +7856,7 @@ ; CI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_private_v3s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7624,6 +7909,7 @@ ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v3s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7676,6 +7962,7 @@ ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v3s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7728,6 +8015,7 @@ ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v3s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7758,6 +8046,7 @@ ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; CI-LABEL: name: test_load_private_v3s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7771,6 +8060,7 @@ ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_load_private_v3s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -7784,6 +8074,7 @@ ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v3s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -7797,6 +8088,7 @@ ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v3s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -7810,6 +8102,7 @@ ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v3s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -7894,6 +8187,7 @@ ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v4s32_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -7961,6 +8255,7 @@ ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v4s32_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8028,6 +8323,7 @@ ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v4s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8095,6 +8391,7 @@ ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v4s32_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8162,6 +8459,7 @@ ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v4s32_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8195,6 +8493,7 @@ ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v4s32_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8211,6 +8510,7 @@ ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v4s32_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8227,6 +8527,7 @@ ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v4s32_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8243,6 +8544,7 @@ ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v4s32_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8259,6 +8561,7 @@ ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v4s32_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8292,6 +8595,7 @@ ; SI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v4s32_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8308,6 +8612,7 @@ ; CI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v4s32_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8324,6 +8629,7 @@ ; VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v4s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8340,6 +8646,7 @@ ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v4s32_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8356,6 +8663,7 @@ ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v4s32_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8407,6 +8715,7 @@ ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v4s32_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8441,6 +8750,7 @@ ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v4s32_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8475,6 +8785,7 @@ ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v4s32_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8509,6 +8820,7 @@ ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v4s32_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8543,6 +8855,7 @@ ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v4s32_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8627,6 +8940,7 @@ ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; CI-LABEL: name: test_load_private_v4s32_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8694,6 +9008,7 @@ ; CI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_load_private_v4s32_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8761,6 +9076,7 @@ ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v4s32_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -8828,6 +9144,7 @@ ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v4s32_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -8895,6 +9212,7 @@ ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v4s32_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -8940,6 +9258,7 @@ ; SI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; CI-LABEL: name: test_load_private_v8s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -8968,6 +9287,7 @@ ; CI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; VI-LABEL: name: test_load_private_v8s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -8996,6 +9316,7 @@ ; VI-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v8s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9024,6 +9345,7 @@ ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v8s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9052,6 +9374,7 @@ ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v8s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9125,6 +9448,7 @@ ; SI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; CI-LABEL: name: test_load_private_v16s32_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9177,6 +9501,7 @@ ; CI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; VI-LABEL: name: test_load_private_v16s32_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9229,6 +9554,7 @@ ; VI-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; GFX9-LABEL: name: test_load_private_v16s32_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9281,6 +9607,7 @@ ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; GFX10-LABEL: name: test_load_private_v16s32_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9333,6 +9660,7 @@ ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; ; GFX11-LABEL: name: test_load_private_v16s32_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9377,6 +9705,7 @@ ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-LABEL: name: test_load_private_v2s64_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9394,6 +9723,7 @@ ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_private_v2s64_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9411,6 +9741,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_private_v2s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9428,6 +9759,7 @@ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_load_private_v2s64_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9445,6 +9777,7 @@ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-LABEL: name: test_load_private_v2s64_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9530,6 +9863,7 @@ ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; CI-LABEL: name: test_load_private_v2s64_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9598,6 +9932,7 @@ ; CI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_load_private_v2s64_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9666,6 +10001,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_load_private_v2s64_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9734,6 +10070,7 @@ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_load_private_v2s64_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9802,6 +10139,7 @@ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX11-LABEL: name: test_load_private_v2s64_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9844,6 +10182,7 @@ ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-LABEL: name: test_load_private_v3s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -9869,6 +10208,7 @@ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_private_v3s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -9894,6 +10234,7 @@ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_private_v3s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -9919,6 +10260,7 @@ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX10-LABEL: name: test_load_private_v3s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -9944,6 +10286,7 @@ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX11-LABEL: name: test_load_private_v3s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -9999,6 +10342,7 @@ ; SI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; CI-LABEL: name: test_load_private_v4s64_align32 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10028,6 +10372,7 @@ ; CI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_load_private_v4s64_align32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10057,6 +10402,7 @@ ; VI-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_load_private_v4s64_align32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10086,6 +10432,7 @@ ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX10-LABEL: name: test_load_private_v4s64_align32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10115,6 +10462,7 @@ ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX11-LABEL: name: test_load_private_v4s64_align32 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10153,6 +10501,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; CI-LABEL: name: test_load_private_v2p1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10170,6 +10519,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; VI-LABEL: name: test_load_private_v2p1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10187,6 +10537,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX9-LABEL: name: test_load_private_v2p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10204,6 +10555,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX10-LABEL: name: test_load_private_v2p1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10221,6 +10573,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; ; GFX11-LABEL: name: test_load_private_v2p1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10268,6 +10621,7 @@ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; CI-LABEL: name: test_load_private_v4p1_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10297,6 +10651,7 @@ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; VI-LABEL: name: test_load_private_v4p1_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10326,6 +10681,7 @@ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; GFX9-LABEL: name: test_load_private_v4p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10355,6 +10711,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; GFX10-LABEL: name: test_load_private_v4p1_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10384,6 +10741,7 @@ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; ; GFX11-LABEL: name: test_load_private_v4p1_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10416,6 +10774,7 @@ ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; CI-LABEL: name: test_load_private_v2p3_align8 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10426,6 +10785,7 @@ ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; VI-LABEL: name: test_load_private_v2p3_align8 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10436,6 +10796,7 @@ ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX9-LABEL: name: test_load_private_v2p3_align8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10446,6 +10807,7 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX10-LABEL: name: test_load_private_v2p3_align8 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10456,6 +10818,7 @@ ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; ; GFX11-LABEL: name: test_load_private_v2p3_align8 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10479,30 +10842,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_ext_load_private_s32_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_private_s32_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10526,30 +10894,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; CI-LABEL: name: test_ext_load_private_s32_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; VI-LABEL: name: test_ext_load_private_s32_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX9-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX10-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; ; GFX11-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10575,6 +10948,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10582,6 +10956,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10589,6 +10964,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10596,6 +10972,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10603,6 +10980,7 @@ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10628,6 +11006,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10635,6 +11014,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10642,6 +11022,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10649,6 +11030,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10656,6 +11038,7 @@ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10681,6 +11064,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10688,6 +11072,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10695,6 +11080,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10702,6 +11088,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10709,6 +11096,7 @@ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10737,6 +11125,7 @@ ; SI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; CI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10747,6 +11136,7 @@ ; CI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; VI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10757,6 +11147,7 @@ ; VI-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX9-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10767,6 +11158,7 @@ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX10-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10777,6 +11169,7 @@ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; ; GFX11-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10805,6 +11198,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10812,6 +11206,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10819,6 +11214,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10826,6 +11222,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10833,6 +11230,7 @@ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10858,6 +11256,7 @@ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; CI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -10865,6 +11264,7 @@ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; VI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -10872,6 +11272,7 @@ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -10879,6 +11280,7 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX10-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -10886,6 +11288,7 @@ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; ; GFX11-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10910,30 +11313,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -10957,30 +11365,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11004,30 +11417,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; CI-LABEL: name: test_extload_private_v2s32_from_4_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; VI-LABEL: name: test_extload_private_v2s32_from_4_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11051,30 +11469,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; CI-LABEL: name: test_extload_private_v3s32_from_6_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; VI-LABEL: name: test_extload_private_v3s32_from_6_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX9-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX10-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; ; GFX11-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11098,30 +11521,35 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; CI-LABEL: name: test_extload_private_v4s32_from_8_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; VI-LABEL: name: test_extload_private_v4s32_from_8_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX9-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX10-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; ; GFX11-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11240,6 +11668,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_private_v2s96_align1 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11341,6 +11770,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_private_v2s96_align1 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11442,6 +11872,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_private_v2s96_align1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11543,6 +11974,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_private_v2s96_align1 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11644,6 +12076,7 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_private_v2s96_align1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11724,6 +12157,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_private_v2s96_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -11776,6 +12210,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_private_v2s96_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -11828,6 +12263,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_private_v2s96_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -11880,6 +12316,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_private_v2s96_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -11932,6 +12369,7 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_private_v2s96_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -11986,6 +12424,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_private_v2s96_align4 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12012,6 +12451,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_private_v2s96_align4 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12038,6 +12478,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_private_v2s96_align4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12064,6 +12505,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_private_v2s96_align4 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12090,6 +12532,7 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_private_v2s96_align4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -12144,6 +12587,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; CI-LABEL: name: test_load_private_v2s96_align16 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} @@ -12170,6 +12614,7 @@ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; VI-LABEL: name: test_load_private_v2s96_align16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -12196,6 +12641,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX9-LABEL: name: test_load_private_v2s96_align16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -12222,6 +12668,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX10-LABEL: name: test_load_private_v2s96_align16 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -12248,6 +12695,7 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; ; GFX11-LABEL: name: test_load_private_v2s96_align16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -18,6 +18,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; VI-LABEL: name: test_lshr_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX9-LABEL: name: test_lshr_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -51,6 +53,7 @@ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; VI-LABEL: name: test_lshr_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -59,6 +62,7 @@ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; GFX9-LABEL: name: test_lshr_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -85,6 +89,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; VI-LABEL: name: test_lshr_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -92,6 +97,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; GFX9-LABEL: name: test_lshr_s64_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -119,6 +125,7 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; VI-LABEL: name: test_lshr_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -128,6 +135,7 @@ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[AND]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; ; GFX9-LABEL: name: test_lshr_s64_s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -159,6 +167,7 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; VI-LABEL: name: test_lshr_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -169,6 +178,7 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_lshr_s16_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -203,6 +213,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; VI-LABEL: name: test_lshr_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -213,6 +224,7 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_lshr_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -249,26 +261,28 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; VI-LABEL: name: test_lshr_s16_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_lshr_s16_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) @@ -298,26 +312,28 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; VI-LABEL: name: test_lshr_i8_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND1]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_lshr_i8_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -350,6 +366,7 @@ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_lshr_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -361,6 +378,7 @@ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_lshr_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -396,6 +414,7 @@ ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[UV5]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_lshr_v3s32_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -408,6 +427,7 @@ ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[UV5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32), [[LSHR2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_lshr_v3s32_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -443,6 +463,7 @@ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_lshr_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -454,6 +475,7 @@ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_lshr_v2s64_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -491,6 +513,7 @@ ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_lshr_v3s64_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -505,6 +528,7 @@ ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_lshr_v3s64_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -548,15 +572,14 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR1]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_lshr_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -579,6 +602,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_lshr_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -610,14 +634,14 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[UV]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[UV1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[UV1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_lshr_v2s16_v2s32 ; VI: liveins: $vgpr0, $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -639,6 +663,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_lshr_v2s16_v2s32 ; GFX9: liveins: $vgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -686,28 +711,26 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR1]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_lshr_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -741,13 +764,12 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_lshr_v3s16_v3s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -810,34 +832,32 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR1]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL2]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -877,13 +897,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -953,27 +973,24 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[AND6]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[LSHR2]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[LSHR3]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_lshr_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1014,6 +1031,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_lshr_v4s16_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1042,68 +1060,70 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C1]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s128 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C1]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s128 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C1]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 @@ -1142,6 +1162,7 @@ ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s132 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1165,6 +1186,7 @@ ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s132 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1205,11 +1227,13 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_0 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_0 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1241,6 +1265,7 @@ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_23 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1254,6 +1279,7 @@ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_23 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1292,6 +1318,7 @@ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_31 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1305,6 +1332,7 @@ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_31 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1342,6 +1370,7 @@ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1354,6 +1383,7 @@ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1391,6 +1421,7 @@ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_33 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1404,6 +1435,7 @@ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[LSHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_33 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1439,6 +1471,7 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LSHR]](s64), [[C1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_lshr_s128_s32_127 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1449,6 +1482,7 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LSHR]](s64), [[C1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_lshr_s128_s32_127 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1476,33 +1510,33 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[COPY1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C1]] + ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1510,27 +1544,27 @@ ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C1]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; SI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; SI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; SI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB9]](s32) @@ -1538,50 +1572,51 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C1]] + ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; SI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; SI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] ; SI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; SI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C1]] - ; SI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; SI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] + ; SI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; VI-LABEL: name: test_lshr_s256_s256 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[COPY1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C1]] + ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1589,27 +1624,27 @@ ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C1]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; VI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; VI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; VI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB9]](s32) @@ -1617,50 +1652,51 @@ ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C1]] + ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; VI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; VI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] ; VI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; VI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C1]] - ; VI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; VI-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] + ; VI-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; GFX9-LABEL: name: test_lshr_s256_s256 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C1]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1668,27 +1704,27 @@ ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C1]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB9]](s32) @@ -1696,15 +1732,15 @@ ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C1]] + ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] ; GFX9-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] ; GFX9-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; GFX9-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] ; GFX9-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; GFX9-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C1]] - ; GFX9-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; GFX9-NEXT: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] + ; GFX9-NEXT: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) @@ -1761,6 +1797,7 @@ ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; VI-LABEL: name: test_lshr_v2s128_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1801,6 +1838,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX9-LABEL: name: test_lshr_v2s128_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -1859,12 +1897,12 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -1885,18 +1923,19 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_lshr_s65_s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -1917,18 +1956,19 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9-LABEL: name: test_lshr_s65_s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -1968,12 +2008,12 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -1994,17 +2034,18 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_lshr_s65_s32_constant8 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -2025,17 +2066,18 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9-LABEL: name: test_lshr_s65_s32_constant8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %23(s64) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -2077,12 +2119,12 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; SI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C2]] ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -2103,6 +2145,7 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; VI-LABEL: name: test_lshr_s65_s32_known_pow2 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -2110,12 +2153,12 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; VI-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C2]] ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 @@ -2136,6 +2179,7 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; GFX9-LABEL: name: test_lshr_s65_s32_known_pow2 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -2143,12 +2187,12 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C2]] ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir @@ -23,8 +23,8 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) @@ -91,14 +91,11 @@ bb.0: ; CHECK-LABEL: name: test_merge_s16_s8_s8 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C3]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C1]], [[TRUNC]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s8) = G_CONSTANT i8 0 @@ -115,26 +112,22 @@ ; CHECK-LABEL: name: test_merge_s24_s8_s8_s8 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C2]], [[C1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C1]], [[TRUNC]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C4]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C3]], [[TRUNC1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s8) = G_CONSTANT i8 0 @@ -154,18 +147,14 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C6]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C6]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s8) = G_CONSTANT i8 0 @@ -242,47 +231,40 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C6]], [[C5]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C9]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C5]], [[TRUNC]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C7]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C7]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C6]](s16) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[COPY4]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C9]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[COPY3]], [[TRUNC3]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C7]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C10]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC5]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C11]](s32) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s24) = G_TRUNC [[OR6]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC6]](s24) @@ -308,47 +290,39 @@ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C6]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C6]], [[TRUNC]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C8]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC1]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C11]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C9]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC2]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[C8]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C6]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[COPY7]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C7]](s16) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[COPY4]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C10]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[COPY3]], [[TRUNC3]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C8]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC4]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C9]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C11]](s32) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C10]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C9]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC5]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C12]](s32) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C11]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s28) = G_TRUNC [[OR6]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC6]](s28) @@ -376,33 +350,25 @@ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C8]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C8]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C9]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C9]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C10]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C10]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C11]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C11]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C12]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C12]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C7]](s32) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C13]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C13]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] ; CHECK-NEXT: S_NOP 0, implicit [[OR6]](s32) %0:_(s4) = G_CONSTANT i4 0 @@ -426,43 +392,34 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C4]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C4]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[TRUNC]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C4]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C6]], [[TRUNC1]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C9]], [[C4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C7]], [[TRUNC2]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C5]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C8]], [[TRUNC3]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) @@ -490,62 +447,49 @@ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C7]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C10]], [[C7]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C7]], [[TRUNC]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C11]], [[C7]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C9]], [[TRUNC1]] + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C12]], [[C7]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[COPY6]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C10]], [[TRUNC2]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[COPY3]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[C9]](s16) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[COPY8]], [[C7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[COPY9]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C11]], [[TRUNC3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C8]](s16) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[COPY5]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC4]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C13]], [[C7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[COPY11]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[COPY4]], [[TRUNC4]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[COPY6]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND5]], [[TRUNC5]] + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[C12]], [[TRUNC5]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) @@ -605,44 +549,36 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C4]], [[C3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[C7]], [[C3]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[COPY2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[TRUNC]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND1]], [[TRUNC1]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C8]], [[C3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[COPY4]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C5]], [[TRUNC1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC2]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C9]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C6]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C5]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C6]], [[TRUNC2]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C8]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C4]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC3]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C7]], [[TRUNC3]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s56) = G_TRUNC [[MV]](s64) @@ -667,315 +603,249 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C3]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C4]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C5]](s32) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C6]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C7]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C8]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C9]](s32) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C10]](s32) + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C11]](s32) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32) ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C12]](s32) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C13]](s32) + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32) ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C14]](s32) + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32) ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C15]](s32) + ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32) ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C16]](s32) + ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32) ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C17]](s32) ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C18]](s32) + ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32) ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C19]](s32) + ; CHECK-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C19]](s32) ; CHECK-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C20]](s32) + ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C20]](s32) ; CHECK-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[COPY21]], [[C21]](s32) + ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32) ; CHECK-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[COPY22]], [[C22]](s32) + ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32) ; CHECK-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[COPY23]], [[C23]](s32) + ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32) ; CHECK-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[COPY24]], [[C24]](s32) + ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32) ; CHECK-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[COPY25]], [[C25]](s32) + ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32) ; CHECK-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[COPY26]], [[C26]](s32) + ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32) ; CHECK-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[COPY27]], [[C27]](s32) + ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32) ; CHECK-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] - ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[COPY28]], [[C28]](s32) + ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32) ; CHECK-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] - ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[COPY29]], [[C29]](s32) + ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32) ; CHECK-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[COPY30]], [[C30]](s32) + ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32) ; CHECK-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[COPY31]], [[C31]](s32) + ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32) ; CHECK-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[COPY33]], [[C1]](s32) - ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[COPY32]], [[SHL31]] - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[COPY34]], [[C2]](s32) + ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL31]] + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) ; CHECK-NEXT: [[OR32:%[0-9]+]]:_(s32) = G_OR [[OR31]], [[SHL32]] - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[COPY35]], [[C3]](s32) + ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C3]](s32) ; CHECK-NEXT: [[OR33:%[0-9]+]]:_(s32) = G_OR [[OR32]], [[SHL33]] - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[COPY36]], [[C4]](s32) + ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32) ; CHECK-NEXT: [[OR34:%[0-9]+]]:_(s32) = G_OR [[OR33]], [[SHL34]] - ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[COPY37]], [[C5]](s32) + ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32) ; CHECK-NEXT: [[OR35:%[0-9]+]]:_(s32) = G_OR [[OR34]], [[SHL35]] - ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[COPY38]], [[C6]](s32) + ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32) ; CHECK-NEXT: [[OR36:%[0-9]+]]:_(s32) = G_OR [[OR35]], [[SHL36]] - ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[COPY39]], [[C7]](s32) + ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32) ; CHECK-NEXT: [[OR37:%[0-9]+]]:_(s32) = G_OR [[OR36]], [[SHL37]] - ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[COPY40]], [[C8]](s32) + ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32) ; CHECK-NEXT: [[OR38:%[0-9]+]]:_(s32) = G_OR [[OR37]], [[SHL38]] - ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[COPY41]], [[C9]](s32) + ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32) ; CHECK-NEXT: [[OR39:%[0-9]+]]:_(s32) = G_OR [[OR38]], [[SHL39]] - ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[COPY42]], [[C10]](s32) + ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32) ; CHECK-NEXT: [[OR40:%[0-9]+]]:_(s32) = G_OR [[OR39]], [[SHL40]] - ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[COPY43]], [[C11]](s32) + ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32) ; CHECK-NEXT: [[OR41:%[0-9]+]]:_(s32) = G_OR [[OR40]], [[SHL41]] - ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[COPY44]], [[C12]](s32) + ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32) ; CHECK-NEXT: [[OR42:%[0-9]+]]:_(s32) = G_OR [[OR41]], [[SHL42]] - ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[COPY45]], [[C13]](s32) + ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32) ; CHECK-NEXT: [[OR43:%[0-9]+]]:_(s32) = G_OR [[OR42]], [[SHL43]] - ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[COPY46]], [[C14]](s32) + ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32) ; CHECK-NEXT: [[OR44:%[0-9]+]]:_(s32) = G_OR [[OR43]], [[SHL44]] - ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[COPY47]], [[C15]](s32) + ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32) ; CHECK-NEXT: [[OR45:%[0-9]+]]:_(s32) = G_OR [[OR44]], [[SHL45]] - ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[COPY48]], [[C16]](s32) + ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32) ; CHECK-NEXT: [[OR46:%[0-9]+]]:_(s32) = G_OR [[OR45]], [[SHL46]] - ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[COPY49]], [[C17]](s32) + ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C17]](s32) ; CHECK-NEXT: [[OR47:%[0-9]+]]:_(s32) = G_OR [[OR46]], [[SHL47]] - ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[COPY50]], [[C18]](s32) + ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32) ; CHECK-NEXT: [[OR48:%[0-9]+]]:_(s32) = G_OR [[OR47]], [[SHL48]] - ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[COPY51]], [[C19]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C19]](s32) ; CHECK-NEXT: [[OR49:%[0-9]+]]:_(s32) = G_OR [[OR48]], [[SHL49]] - ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[COPY52]], [[C20]](s32) + ; CHECK-NEXT: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C20]](s32) ; CHECK-NEXT: [[OR50:%[0-9]+]]:_(s32) = G_OR [[OR49]], [[SHL50]] - ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[COPY53]], [[C21]](s32) + ; CHECK-NEXT: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32) ; CHECK-NEXT: [[OR51:%[0-9]+]]:_(s32) = G_OR [[OR50]], [[SHL51]] - ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[COPY54]], [[C22]](s32) + ; CHECK-NEXT: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32) ; CHECK-NEXT: [[OR52:%[0-9]+]]:_(s32) = G_OR [[OR51]], [[SHL52]] - ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[COPY55]], [[C23]](s32) + ; CHECK-NEXT: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32) ; CHECK-NEXT: [[OR53:%[0-9]+]]:_(s32) = G_OR [[OR52]], [[SHL53]] - ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[COPY56]], [[C24]](s32) + ; CHECK-NEXT: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32) ; CHECK-NEXT: [[OR54:%[0-9]+]]:_(s32) = G_OR [[OR53]], [[SHL54]] - ; CHECK-NEXT: [[COPY57:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[COPY57]], [[C25]](s32) + ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32) ; CHECK-NEXT: [[OR55:%[0-9]+]]:_(s32) = G_OR [[OR54]], [[SHL55]] - ; CHECK-NEXT: [[COPY58:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[COPY58]], [[C26]](s32) + ; CHECK-NEXT: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32) ; CHECK-NEXT: [[OR56:%[0-9]+]]:_(s32) = G_OR [[OR55]], [[SHL56]] - ; CHECK-NEXT: [[COPY59:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[COPY59]], [[C27]](s32) + ; CHECK-NEXT: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32) ; CHECK-NEXT: [[OR57:%[0-9]+]]:_(s32) = G_OR [[OR56]], [[SHL57]] - ; CHECK-NEXT: [[COPY60:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[COPY60]], [[C28]](s32) + ; CHECK-NEXT: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32) ; CHECK-NEXT: [[OR58:%[0-9]+]]:_(s32) = G_OR [[OR57]], [[SHL58]] - ; CHECK-NEXT: [[COPY61:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[COPY61]], [[C29]](s32) + ; CHECK-NEXT: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32) ; CHECK-NEXT: [[OR59:%[0-9]+]]:_(s32) = G_OR [[OR58]], [[SHL59]] - ; CHECK-NEXT: [[COPY62:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[COPY62]], [[C30]](s32) + ; CHECK-NEXT: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32) ; CHECK-NEXT: [[OR60:%[0-9]+]]:_(s32) = G_OR [[OR59]], [[SHL60]] - ; CHECK-NEXT: [[COPY63:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[COPY63]], [[C31]](s32) + ; CHECK-NEXT: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32) ; CHECK-NEXT: [[OR61:%[0-9]+]]:_(s32) = G_OR [[OR60]], [[SHL61]] - ; CHECK-NEXT: [[COPY64:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY65:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[COPY65]], [[C1]](s32) - ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(s32) = G_OR [[COPY64]], [[SHL62]] - ; CHECK-NEXT: [[COPY66:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[COPY66]], [[C2]](s32) + ; CHECK-NEXT: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR62:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL62]] + ; CHECK-NEXT: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) ; CHECK-NEXT: [[OR63:%[0-9]+]]:_(s32) = G_OR [[OR62]], [[SHL63]] - ; CHECK-NEXT: [[COPY67:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[COPY67]], [[C3]](s32) + ; CHECK-NEXT: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C3]](s32) ; CHECK-NEXT: [[OR64:%[0-9]+]]:_(s32) = G_OR [[OR63]], [[SHL64]] - ; CHECK-NEXT: [[COPY68:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[COPY68]], [[C4]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C4]](s32) ; CHECK-NEXT: [[OR65:%[0-9]+]]:_(s32) = G_OR [[OR64]], [[SHL65]] - ; CHECK-NEXT: [[COPY69:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[COPY69]], [[C5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C5]](s32) ; CHECK-NEXT: [[OR66:%[0-9]+]]:_(s32) = G_OR [[OR65]], [[SHL66]] - ; CHECK-NEXT: [[COPY70:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[COPY70]], [[C6]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C6]](s32) ; CHECK-NEXT: [[OR67:%[0-9]+]]:_(s32) = G_OR [[OR66]], [[SHL67]] - ; CHECK-NEXT: [[COPY71:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[COPY71]], [[C7]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C7]](s32) ; CHECK-NEXT: [[OR68:%[0-9]+]]:_(s32) = G_OR [[OR67]], [[SHL68]] - ; CHECK-NEXT: [[COPY72:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[COPY72]], [[C8]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C8]](s32) ; CHECK-NEXT: [[OR69:%[0-9]+]]:_(s32) = G_OR [[OR68]], [[SHL69]] - ; CHECK-NEXT: [[COPY73:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[COPY73]], [[C9]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C9]](s32) ; CHECK-NEXT: [[OR70:%[0-9]+]]:_(s32) = G_OR [[OR69]], [[SHL70]] - ; CHECK-NEXT: [[COPY74:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[COPY74]], [[C10]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C10]](s32) ; CHECK-NEXT: [[OR71:%[0-9]+]]:_(s32) = G_OR [[OR70]], [[SHL71]] - ; CHECK-NEXT: [[COPY75:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[COPY75]], [[C11]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C11]](s32) ; CHECK-NEXT: [[OR72:%[0-9]+]]:_(s32) = G_OR [[OR71]], [[SHL72]] - ; CHECK-NEXT: [[COPY76:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY76]], [[C12]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C12]](s32) ; CHECK-NEXT: [[OR73:%[0-9]+]]:_(s32) = G_OR [[OR72]], [[SHL73]] - ; CHECK-NEXT: [[COPY77:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY77]], [[C13]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C13]](s32) ; CHECK-NEXT: [[OR74:%[0-9]+]]:_(s32) = G_OR [[OR73]], [[SHL74]] - ; CHECK-NEXT: [[COPY78:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY78]], [[C14]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C14]](s32) ; CHECK-NEXT: [[OR75:%[0-9]+]]:_(s32) = G_OR [[OR74]], [[SHL75]] - ; CHECK-NEXT: [[COPY79:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY79]], [[C15]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C15]](s32) ; CHECK-NEXT: [[OR76:%[0-9]+]]:_(s32) = G_OR [[OR75]], [[SHL76]] - ; CHECK-NEXT: [[COPY80:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY80]], [[C16]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C16]](s32) ; CHECK-NEXT: [[OR77:%[0-9]+]]:_(s32) = G_OR [[OR76]], [[SHL77]] - ; CHECK-NEXT: [[COPY81:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY81]], [[C17]](s32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C17]](s32) ; CHECK-NEXT: [[OR78:%[0-9]+]]:_(s32) = G_OR [[OR77]], [[SHL78]] - ; CHECK-NEXT: [[COPY82:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY82]], [[C18]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C18]](s32) ; CHECK-NEXT: [[OR79:%[0-9]+]]:_(s32) = G_OR [[OR78]], [[SHL79]] - ; CHECK-NEXT: [[COPY83:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY83]], [[C19]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C19]](s32) ; CHECK-NEXT: [[OR80:%[0-9]+]]:_(s32) = G_OR [[OR79]], [[SHL80]] - ; CHECK-NEXT: [[COPY84:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY84]], [[C20]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C20]](s32) ; CHECK-NEXT: [[OR81:%[0-9]+]]:_(s32) = G_OR [[OR80]], [[SHL81]] - ; CHECK-NEXT: [[COPY85:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY85]], [[C21]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C21]](s32) ; CHECK-NEXT: [[OR82:%[0-9]+]]:_(s32) = G_OR [[OR81]], [[SHL82]] - ; CHECK-NEXT: [[COPY86:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY86]], [[C22]](s32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C22]](s32) ; CHECK-NEXT: [[OR83:%[0-9]+]]:_(s32) = G_OR [[OR82]], [[SHL83]] - ; CHECK-NEXT: [[COPY87:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY87]], [[C23]](s32) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY21]], [[C23]](s32) ; CHECK-NEXT: [[OR84:%[0-9]+]]:_(s32) = G_OR [[OR83]], [[SHL84]] - ; CHECK-NEXT: [[COPY88:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY88]], [[C24]](s32) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY22]], [[C24]](s32) ; CHECK-NEXT: [[OR85:%[0-9]+]]:_(s32) = G_OR [[OR84]], [[SHL85]] - ; CHECK-NEXT: [[COPY89:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY89]], [[C25]](s32) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY23]], [[C25]](s32) ; CHECK-NEXT: [[OR86:%[0-9]+]]:_(s32) = G_OR [[OR85]], [[SHL86]] - ; CHECK-NEXT: [[COPY90:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY90]], [[C26]](s32) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY24]], [[C26]](s32) ; CHECK-NEXT: [[OR87:%[0-9]+]]:_(s32) = G_OR [[OR86]], [[SHL87]] - ; CHECK-NEXT: [[COPY91:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY91]], [[C27]](s32) + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY25]], [[C27]](s32) ; CHECK-NEXT: [[OR88:%[0-9]+]]:_(s32) = G_OR [[OR87]], [[SHL88]] - ; CHECK-NEXT: [[COPY92:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY92]], [[C28]](s32) + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY26]], [[C28]](s32) ; CHECK-NEXT: [[OR89:%[0-9]+]]:_(s32) = G_OR [[OR88]], [[SHL89]] - ; CHECK-NEXT: [[COPY93:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY93]], [[C29]](s32) + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY27]], [[C29]](s32) ; CHECK-NEXT: [[OR90:%[0-9]+]]:_(s32) = G_OR [[OR89]], [[SHL90]] - ; CHECK-NEXT: [[COPY94:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY94]], [[C30]](s32) + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY28]], [[C30]](s32) ; CHECK-NEXT: [[OR91:%[0-9]+]]:_(s32) = G_OR [[OR90]], [[SHL91]] - ; CHECK-NEXT: [[COPY95:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[COPY95]], [[C31]](s32) + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[COPY29]], [[C31]](s32) ; CHECK-NEXT: [[OR92:%[0-9]+]]:_(s32) = G_OR [[OR91]], [[SHL92]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR30]](s32), [[OR61]](s32), [[OR92]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s68) = G_TRUNC [[MV]](s96) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -472,10 +472,9 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -534,10 +533,9 @@ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -546,10 +544,9 @@ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) @@ -564,20 +561,17 @@ ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL5]] ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -179,10 +179,9 @@ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL4]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -1373,8 +1372,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 @@ -1430,8 +1429,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 @@ -1654,9 +1653,7 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s32), [[AND3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LSHR]](s32), [[LSHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 @@ -1670,12 +1667,10 @@ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND4]](s32), [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND6]](s32), [[AND7]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND2]](s32), [[AND3]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LSHR2]](s32), [[LSHR3]] ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -1684,9 +1679,9 @@ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir @@ -12,8 +12,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[AND]](s64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1) @@ -94,8 +94,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[AND]](s64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir @@ -21,36 +21,33 @@ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX6-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX6-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[AND3]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; GFX6-NEXT: $sgpr0 = COPY [[OR]](s32) ; @@ -62,41 +59,38 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C]](s32) ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[C]] ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C]] ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND1]](s16) ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[COPY3]] + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[COPY2]] ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C6]], [[C4]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[AND2]](s16) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[SUB4]], [[C4]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[AND4]](s16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND2]], [[C6]](s16) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[SUB4]], [[C4]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND3]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] @@ -214,36 +208,33 @@ ; GFX-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; GFX-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 ; GFX-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[C1]](s32) ; GFX-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) ; GFX-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] ; GFX-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) ; GFX-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]] + ; GFX-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[C1]] ; GFX-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] ; GFX-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] ; GFX-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] ; GFX-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[C1]] ; GFX-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] - ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[C1]] + ; GFX-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] ; GFX-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] - ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[C1]] + ; GFX-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C1]] ; GFX-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] ; GFX-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) ; GFX-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; GFX-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) ; GFX-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; GFX-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[AND3]](s32) ; GFX-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; GFX-NEXT: $sgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -256,17 +256,16 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL2]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -31,6 +31,7 @@ ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: saddsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -54,6 +55,7 @@ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: saddsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -103,6 +105,7 @@ ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: saddsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -126,6 +129,7 @@ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: saddsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -187,8 +191,8 @@ ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C1]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -198,6 +202,7 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: saddsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -241,6 +246,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: saddsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -311,6 +317,7 @@ ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: saddsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -330,6 +337,7 @@ ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: saddsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -395,6 +403,7 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: saddsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -432,6 +441,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: saddsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -512,13 +522,13 @@ ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: saddsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -577,13 +587,13 @@ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: saddsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -709,6 +719,7 @@ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: saddsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -776,6 +787,7 @@ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: saddsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -815,6 +827,7 @@ ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX8-LABEL: name: saddsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -831,6 +844,7 @@ ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX9-LABEL: name: saddsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -876,6 +890,7 @@ ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: saddsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -902,6 +917,7 @@ ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: saddsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -949,6 +965,7 @@ ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: saddsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -973,6 +990,7 @@ ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: saddsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1051,6 +1069,7 @@ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: saddsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1093,6 +1112,7 @@ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: saddsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -417,10 +417,9 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -1362,10 +1361,9 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) @@ -1401,8 +1399,8 @@ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[ICMP2]](s1) ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -17,12 +17,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX8-LABEL: name: test_sext_inreg_s32_1 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX6-LABEL: name: test_sext_inreg_s32_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -46,12 +48,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 2 ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX8-LABEL: name: test_sext_inreg_s32_2 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 2 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX6-LABEL: name: test_sext_inreg_s32_2 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -75,12 +79,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX8-LABEL: name: test_sext_inreg_s32_8 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX6-LABEL: name: test_sext_inreg_s32_8 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -104,12 +110,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX8-LABEL: name: test_sext_inreg_s32_16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX6-LABEL: name: test_sext_inreg_s32_16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -133,12 +141,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 31 ; GFX9-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX8-LABEL: name: test_sext_inreg_s32_31 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 31 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX6-LABEL: name: test_sext_inreg_s32_31 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -162,12 +172,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 1 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 1 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -191,12 +203,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 2 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 2 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_2 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -220,12 +234,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -249,12 +265,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -278,12 +296,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 31 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_31 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 31 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_31 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -307,12 +327,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_32 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -336,12 +358,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_33 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 33 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_33 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -365,12 +389,14 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 63 ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX8-LABEL: name: test_sext_inreg_s64_63 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 63 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + ; ; GFX6-LABEL: name: test_sext_inreg_s64_63 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -395,6 +421,7 @@ ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) + ; ; GFX8-LABEL: name: test_sext_inreg_s16_1 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -404,6 +431,7 @@ ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; ; GFX6-LABEL: name: test_sext_inreg_s16_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -431,6 +459,7 @@ ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 15 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) ; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16) + ; ; GFX8-LABEL: name: test_sext_inreg_s16_15 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -440,6 +469,7 @@ ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) ; GFX8-NEXT: S_ENDPGM 0, implicit [[ASHR]](s16) + ; ; GFX6-LABEL: name: test_sext_inreg_s16_15 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -473,6 +503,7 @@ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX8-LABEL: name: test_sext_inreg_s96_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -486,6 +517,7 @@ ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX6-LABEL: name: test_sext_inreg_s96_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} @@ -520,6 +552,7 @@ ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX8-LABEL: name: test_sext_inreg_s128_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -530,6 +563,7 @@ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX6-LABEL: name: test_sext_inreg_s128_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -564,6 +598,7 @@ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](s160) + ; ; GFX8-LABEL: name: test_sext_inreg_s160_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX8-NEXT: {{ $}} @@ -577,6 +612,7 @@ ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](s160) + ; ; GFX6-LABEL: name: test_sext_inreg_s160_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GFX6-NEXT: {{ $}} @@ -611,6 +647,7 @@ ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](s256) + ; ; GFX8-LABEL: name: test_sext_inreg_256_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -621,6 +658,7 @@ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](s256) + ; ; GFX6-LABEL: name: test_sext_inreg_256_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} @@ -652,6 +690,7 @@ ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) + ; ; GFX8-LABEL: name: test_sext_inreg_512_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX8-NEXT: {{ $}} @@ -662,6 +701,7 @@ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) + ; ; GFX6-LABEL: name: test_sext_inreg_512_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX6-NEXT: {{ $}} @@ -693,6 +733,7 @@ ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](s1024) + ; ; GFX8-LABEL: name: test_sext_inreg_1024_8 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX8-NEXT: {{ $}} @@ -703,6 +744,7 @@ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[MV]](s1024) + ; ; GFX6-LABEL: name: test_sext_inreg_1024_8 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX6-NEXT: {{ $}} @@ -733,6 +775,7 @@ ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_sext_inreg_v2s32_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -742,6 +785,7 @@ ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX6-LABEL: name: test_sext_inreg_v2s32_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -771,6 +815,7 @@ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x s16>) ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>) ; GFX9-NEXT: $vgpr0 = COPY [[ASHR]](<2 x s16>) + ; ; GFX8-LABEL: name: test_sext_inreg_v2s16_1 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -791,6 +836,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX6-LABEL: name: test_sext_inreg_v2s16_1 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} @@ -854,6 +900,7 @@ ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: test_sext_inreg_v3s16_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -889,13 +936,13 @@ ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL5]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX6-LABEL: name: test_sext_inreg_v3s16_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} @@ -924,10 +971,9 @@ ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) @@ -955,6 +1001,7 @@ ; GFX9-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX8-LABEL: name: test_sext_inreg_v3s32_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -965,6 +1012,7 @@ ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 1 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX6-LABEL: name: test_sext_inreg_v3s32_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-NEXT: {{ $}} @@ -997,6 +1045,7 @@ ; GFX9-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 1 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX8-LABEL: name: test_sext_inreg_v4s32_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -1008,6 +1057,7 @@ ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 1 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX6-LABEL: name: test_sext_inreg_v4s32_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: {{ $}} @@ -1044,6 +1094,7 @@ ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: test_sext_inreg_v4s16_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -1079,6 +1130,7 @@ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX6-LABEL: name: test_sext_inreg_v4s16_1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6-NEXT: {{ $}} @@ -1131,6 +1183,7 @@ ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL2]], [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>), [[ASHR2]](<2 x s16>) ; GFX9-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: test_sext_inreg_v6s16_1 ; GFX8: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) @@ -1177,6 +1230,7 @@ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX6-LABEL: name: test_sext_inreg_v6s16_1 ; GFX6: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) @@ -1241,6 +1295,7 @@ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX8-LABEL: name: test_sext_inreg_v2s128_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1258,6 +1313,7 @@ ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SEXT_INREG1]](s64), [[ASHR1]](s64) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX6-LABEL: name: test_sext_inreg_v2s128_1 ; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -659,8 +659,8 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C3]](s16) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[C4]](s16) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C5]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C5]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -18,6 +18,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; GFX9-LABEL: name: test_shl_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -51,6 +53,7 @@ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; VI-LABEL: name: test_shl_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -59,6 +62,7 @@ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; GFX9-LABEL: name: test_shl_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -85,6 +89,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; VI-LABEL: name: test_shl_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -92,6 +97,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; GFX9-LABEL: name: test_shl_s64_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -119,6 +125,7 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; VI-LABEL: name: test_shl_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -128,6 +135,7 @@ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[AND]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; GFX9-LABEL: name: test_shl_s64_s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -157,6 +165,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -167,6 +176,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_shl_s16_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -200,6 +210,7 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -210,6 +221,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_shl_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -244,26 +256,28 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_s16_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_shl_s16_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) @@ -292,25 +306,27 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_i8_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_shl_i8_i8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) @@ -343,6 +359,7 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_shl_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -354,6 +371,7 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_shl_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -389,6 +407,7 @@ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[UV5]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32), [[SHL2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_shl_v3s32_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -401,6 +420,7 @@ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[UV5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32), [[SHL2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_shl_v3s32_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -436,6 +456,7 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_shl_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -447,6 +468,7 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_shl_v2s64_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -484,6 +506,7 @@ ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_shl_v3s64_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -498,6 +521,7 @@ ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9-LABEL: name: test_shl_v3s64_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -540,14 +564,14 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_shl_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -570,6 +594,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_shl_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -607,6 +632,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_shl_v2s16_v2s32 ; VI: liveins: $vgpr0, $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -628,6 +654,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_shl_v2s16_v2s32 ; GFX9: liveins: $vgpr0, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -673,32 +700,31 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL4]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL5]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_shl_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -738,13 +764,13 @@ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL5]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_shl_v3s16_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -814,24 +840,23 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[AND3]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR2]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND1]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[LSHR3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C1]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C1]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_shl_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -872,6 +897,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_shl_v4s16_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -904,25 +930,27 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_s7_s7 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_shl_s7_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND]](s16) @@ -950,6 +978,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; VI-LABEL: name: test_shl_i24_i32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -957,6 +986,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; ; GFX9-LABEL: name: test_shl_i24_i32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -983,66 +1013,68 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s128 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s128 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[COPY1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) @@ -1083,6 +1115,7 @@ ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s132 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1106,6 +1139,7 @@ ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s132 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1146,11 +1180,13 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_0 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_0 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1182,6 +1218,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_23 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1195,6 +1232,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_23 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1233,6 +1271,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_31 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1246,6 +1285,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_31 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1283,6 +1323,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1295,6 +1336,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1332,6 +1374,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_33 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1345,6 +1388,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SHL]](s64), [[OR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_33 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1380,6 +1424,7 @@ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C1]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[SHL]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_shl_s128_s32_127 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1390,6 +1435,7 @@ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[C1]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[SHL]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9-LABEL: name: test_shl_s128_s32_127 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9-NEXT: {{ $}} @@ -1417,33 +1463,33 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[COPY1]](s32) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[COPY1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1451,37 +1497,37 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[COPY1]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[COPY1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C1]] + ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; SI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; SI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; SI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C1]] + ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] ; SI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C1]] - ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; SI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] + ; SI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) ; SI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] ; SI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] @@ -1491,38 +1537,39 @@ ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; VI-LABEL: name: test_shl_s256_s256 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[COPY1]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[COPY1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1530,37 +1577,37 @@ ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[COPY1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[COPY1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C1]] + ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; VI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; VI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; VI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C1]] + ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] ; VI-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C1]] - ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; VI-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] + ; VI-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) ; VI-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] ; VI-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] @@ -1570,38 +1617,39 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; GFX9-LABEL: name: test_shl_s256_s256 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[COPY1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) @@ -1609,37 +1657,37 @@ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[COPY1]](s32) ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; GFX9-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[COPY1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] ; GFX9-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C1]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] ; GFX9-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; GFX9-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; GFX9-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; GFX9-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; GFX9-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; GFX9-NEXT: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) ; GFX9-NEXT: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] ; GFX9-NEXT: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C1]] + ; GFX9-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] ; GFX9-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] ; GFX9-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; GFX9-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C1]] - ; GFX9-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; GFX9-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] + ; GFX9-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) ; GFX9-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] ; GFX9-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] @@ -1702,6 +1750,7 @@ ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT3]](s64), [[SELECT5]](s64) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; VI-LABEL: name: test_shl_v2s128_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1742,6 +1791,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT3]](s64), [[SELECT5]](s64) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX9-LABEL: name: test_shl_v2s128_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -1822,6 +1872,7 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_shl_s65_s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -1850,6 +1901,7 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9-LABEL: name: test_shl_s65_s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -1919,6 +1971,7 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_shl_s65_s32_constant8 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -1946,6 +1999,7 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9-LABEL: name: test_shl_s65_s32_constant8 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -2016,6 +2070,7 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; VI-LABEL: name: test_shl_s65_s32_known_pow2 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -2045,6 +2100,7 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; GFX9-LABEL: name: test_shl_s65_s32_known_pow2 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -244,8 +244,7 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -276,10 +275,9 @@ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -311,14 +309,13 @@ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -23,6 +23,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -62,6 +63,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -105,6 +107,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -145,11 +148,11 @@ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -192,12 +195,12 @@ ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -246,6 +249,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -285,13 +289,11 @@ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -339,6 +341,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -384,6 +387,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -427,6 +431,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -467,11 +472,11 @@ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -514,12 +519,12 @@ ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -559,12 +564,11 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -600,13 +604,12 @@ ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -644,13 +647,11 @@ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -698,6 +699,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -745,6 +747,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -786,12 +789,12 @@ ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -835,11 +838,11 @@ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -882,12 +885,12 @@ ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -931,11 +934,11 @@ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -980,17 +983,17 @@ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v4s16_v3s16_2_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -998,7 +1001,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) @@ -1006,9 +1009,9 @@ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -1044,12 +1047,12 @@ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v4s16_1_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1098,13 +1101,11 @@ ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: shufflevector_v2s16_v4s16_1_3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulh.mir @@ -15,6 +15,7 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[SMULH]](s32) + ; ; GFX9-LABEL: name: test_smulh_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -45,6 +46,7 @@ ; GFX8-NEXT: [[SMULH1:%[0-9]+]]:_(s32) = G_SMULH [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMULH]](s32), [[SMULH1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_smulh_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -80,6 +82,7 @@ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[MUL]], [[C]](s32) ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 16 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG2]](s32) + ; ; GFX9-LABEL: name: test_smulh_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -124,6 +127,7 @@ ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 8 ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG]](s32) + ; ; GFX9-LABEL: name: test_smulh_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -181,6 +185,7 @@ ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG4]](s32), [[SEXT_INREG5]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_smulh_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -246,6 +251,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_smulh_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -349,8 +355,8 @@ ; GFX8-NEXT: [[ASHR10:%[0-9]+]]:_(s16) = G_ASHR [[SHL7]], [[C3]](s16) ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[ASHR9]], [[ASHR10]] ; GFX8-NEXT: [[ASHR11:%[0-9]+]]:_(s16) = G_ASHR [[MUL3]], [[C3]](s16) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR5]](s16) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C4]] @@ -365,6 +371,7 @@ ; GFX8-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL10]] ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_smulh_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smulo.mir @@ -21,6 +21,7 @@ ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_smulo_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -72,6 +73,7 @@ ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; ; GFX9-LABEL: name: test_smulo_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -124,6 +126,7 @@ ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_smulo_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -169,6 +172,7 @@ ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_smulo_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -235,6 +239,7 @@ ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG8]](s32), [[SEXT_INREG9]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_smulo_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -296,8 +301,8 @@ ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 8 ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[SEXT_INREG5]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -310,6 +315,7 @@ ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; ; GFX9-LABEL: name: test_smulo_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -327,8 +333,8 @@ ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[SEXT_INREG3]], [[SEXT_INREG4]] ; GFX9-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[MUL1]], 8 ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[SEXT_INREG5]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -411,6 +417,7 @@ ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_smulo_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -492,6 +499,7 @@ ; GFX8-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[OR]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[SEXT_INREG3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[SEXT]](s32) + ; ; GFX9-LABEL: name: test_smulo_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -31,14 +31,15 @@ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) + ; ; GFX8-LABEL: name: sshlsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -54,14 +55,15 @@ ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: sshlsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -112,14 +114,15 @@ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) + ; ; GFX8-LABEL: name: sshlsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -135,14 +138,15 @@ ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: sshlsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -204,8 +208,8 @@ ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C1]](s32) - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C2]] @@ -214,6 +218,7 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: sshlsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -223,8 +228,8 @@ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) @@ -255,6 +260,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: sshlsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -264,8 +270,8 @@ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) @@ -334,6 +340,7 @@ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) + ; ; GFX8-LABEL: name: sshlsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -352,6 +359,7 @@ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: sshlsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -408,21 +416,21 @@ ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR1]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[LSHR1]](s32) ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: sshlsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -458,6 +466,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: sshlsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -513,34 +522,32 @@ ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[LSHR1]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[LSHR1]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C3]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) - ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] - ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[LSHR2]](s32) + ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[LSHR2]](s32) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C3]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) @@ -549,23 +556,23 @@ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL6]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C4]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: sshlsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -621,13 +628,13 @@ ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL5]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: sshlsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -723,45 +730,44 @@ ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR2]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[LSHR2]](s32) ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND1]](s32) + ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND1]](s32) ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) - ; GFX6-NEXT: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[AND3]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[LSHR3]](s32) + ; GFX6-NEXT: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[LSHR3]](s32) ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL6]](s32), [[C4]] ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C2]], [[C3]] ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[ASHR6]] ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL7]] ; GFX6-NEXT: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SELECT7]], [[C]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ASHR7]], [[C1]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR7]], [[C1]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: sshlsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -825,6 +831,7 @@ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: sshlsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -907,6 +914,7 @@ ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX8-LABEL: name: sshlsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -922,6 +930,7 @@ ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX9-LABEL: name: sshlsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -973,6 +982,7 @@ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: sshlsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -997,6 +1007,7 @@ ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: sshlsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1049,6 +1060,7 @@ ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; ; GFX8-LABEL: name: sshlsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -1065,6 +1077,7 @@ ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; ; GFX9-LABEL: name: sshlsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1119,6 +1132,7 @@ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: sshlsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1145,6 +1159,7 @@ ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: sshlsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -256,17 +256,16 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL2]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -31,6 +31,7 @@ ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: ssubsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -54,6 +55,7 @@ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ssubsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -103,6 +105,7 @@ ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: ssubsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -126,6 +129,7 @@ ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ssubsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -187,8 +191,8 @@ ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C1]](s32) - ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -198,6 +202,7 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: ssubsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -241,6 +246,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ssubsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -311,6 +317,7 @@ ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX8-LABEL: name: ssubsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -330,6 +337,7 @@ ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB2]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ssubsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -395,6 +403,7 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: ssubsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -432,6 +441,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: ssubsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -512,13 +522,13 @@ ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: ssubsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -577,13 +587,13 @@ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: ssubsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -709,6 +719,7 @@ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: ssubsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -776,6 +787,7 @@ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: ssubsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -815,6 +827,7 @@ ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB2]](s32) + ; ; GFX8-LABEL: name: ssubsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -831,6 +844,7 @@ ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] ; GFX8-NEXT: $vgpr0 = COPY [[SUB2]](s32) + ; ; GFX9-LABEL: name: ssubsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -876,6 +890,7 @@ ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: ssubsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -902,6 +917,7 @@ ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: ssubsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -949,6 +965,7 @@ ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: ssubsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -973,6 +990,7 @@ ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: ssubsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1051,6 +1069,7 @@ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: ssubsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1093,6 +1112,7 @@ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: ssubsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -21,6 +21,7 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] ; SI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s1_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -30,6 +31,7 @@ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] ; CI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s1_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -39,6 +41,7 @@ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] ; VI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s1_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -68,6 +71,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s7_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -76,6 +80,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s7_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -84,6 +89,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s7_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -110,18 +116,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s8_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s8_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -153,12 +162,14 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s16_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -172,6 +183,7 @@ ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s16_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -196,18 +208,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s16_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -232,18 +247,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -274,6 +292,7 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s24_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -286,6 +305,7 @@ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s24_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -298,6 +318,7 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s24_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -334,6 +355,7 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s24_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -346,6 +368,7 @@ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s24_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -358,6 +381,7 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s24_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -401,6 +425,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s24_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -413,6 +438,7 @@ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s24_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -432,6 +458,7 @@ ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s24_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -464,6 +491,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s25_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} @@ -472,6 +500,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s25_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -480,6 +509,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s25_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -543,17 +573,18 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -578,6 +609,7 @@ ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -607,12 +639,14 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -625,6 +659,7 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -648,18 +683,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -697,17 +735,18 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p3_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p3_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -733,6 +772,7 @@ ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p3_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -763,12 +803,14 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p3_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -782,6 +824,7 @@ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -805,18 +848,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) + ; ; CI-LABEL: name: test_store_global_p3_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p3_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; VI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p3_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -858,18 +904,18 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C5]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s48_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} @@ -883,6 +929,7 @@ ; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s48_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -919,6 +966,7 @@ ; VI-NEXT: G_STORE [[TRUNC3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s48_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -963,6 +1011,7 @@ ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s48_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} @@ -976,6 +1025,7 @@ ; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s48_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -995,6 +1045,7 @@ ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s48_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1070,8 +1121,7 @@ ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -1080,23 +1130,24 @@ ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s64_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s64_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1143,6 +1194,7 @@ ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s64_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1184,12 +1236,14 @@ ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s64_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s64_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1214,6 +1268,7 @@ ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s64_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1237,18 +1292,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s64_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s64_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1272,18 +1330,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s64_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s64_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1307,18 +1368,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s64_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s64_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s64_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1362,8 +1426,7 @@ ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -1372,23 +1435,24 @@ ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p0_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p0_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1436,6 +1500,7 @@ ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p0_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1478,12 +1543,14 @@ ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p0_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p0_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1509,6 +1576,7 @@ ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p0_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1532,18 +1600,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p0_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p0_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p0_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1567,18 +1638,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) + ; ; CI-LABEL: name: test_store_global_p0_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p0_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p0_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1602,18 +1676,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p0_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p0_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p0_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1657,8 +1734,7 @@ ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -1667,23 +1743,24 @@ ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p999_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p999_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1731,6 +1808,7 @@ ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p999_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1773,12 +1851,14 @@ ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p999_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p999_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1804,6 +1884,7 @@ ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p999_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1827,18 +1908,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p999_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p999_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p999_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1862,18 +1946,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; ; CI-LABEL: name: test_store_global_p999_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p999_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p999_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1897,18 +1984,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_p999_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_p999_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_p999_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -1946,8 +2036,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -1957,23 +2046,24 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2016,6 +2106,7 @@ ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2053,12 +2144,14 @@ ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2079,6 +2172,7 @@ ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2102,18 +2196,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2137,18 +2234,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2172,18 +2272,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2222,8 +2325,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -2234,23 +2336,24 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p3_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p3_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2295,6 +2398,7 @@ ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p3_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2334,12 +2438,14 @@ ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p3_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2362,6 +2468,7 @@ ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2385,18 +2492,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p3_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p3_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p3_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2420,18 +2530,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p3_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p3_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p3_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2455,18 +2568,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p3_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p3_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p3_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2527,12 +2643,14 @@ ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s16_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2576,6 +2694,7 @@ ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s16_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2614,12 +2733,14 @@ ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s16_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -2641,6 +2762,7 @@ ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2664,18 +2786,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2699,18 +2824,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s16_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s16_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2734,18 +2862,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s16_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; CI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s16_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; VI-NEXT: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s16_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -2783,8 +2914,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -2794,14 +2924,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -2811,23 +2940,24 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v3s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -2887,6 +3017,7 @@ ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v3s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -2931,12 +3062,14 @@ ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v3s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -2964,6 +3097,7 @@ ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v3s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -2992,18 +3126,21 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v3s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v3s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -3032,18 +3169,21 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v3s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v3s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -3072,18 +3212,21 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v3s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v3s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -3121,8 +3264,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -3132,14 +3274,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -3149,14 +3290,13 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -3166,23 +3306,24 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -3259,6 +3400,7 @@ ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3310,12 +3452,14 @@ ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -3350,6 +3494,7 @@ ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3373,18 +3518,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3408,18 +3556,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3443,18 +3594,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v4s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v4s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3498,8 +3652,7 @@ ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -3508,14 +3661,13 @@ ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY6]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[COPY7]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -3530,14 +3682,13 @@ ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY11]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -3546,23 +3697,24 @@ ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C2]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C3]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY14]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]] + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C6]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C5]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LSHR11]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s64_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -3648,6 +3800,7 @@ ; VI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s64_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3708,12 +3861,14 @@ ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s64_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -3757,6 +3912,7 @@ ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C3]](s64) ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s64_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3780,18 +3936,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s64_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3815,18 +3974,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s64_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s64_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3850,18 +4012,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s64_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s64_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -3900,8 +4065,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -3911,14 +4075,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -3928,14 +4091,13 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -3945,17 +4107,17 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s16_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -3963,6 +4125,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4040,6 +4203,7 @@ ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s16_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4093,6 +4257,7 @@ ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s16_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4100,6 +4265,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s16_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4135,6 +4301,7 @@ ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s16_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4160,6 +4327,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4167,6 +4335,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4174,6 +4343,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4199,6 +4369,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s16_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4206,6 +4377,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s16_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4213,6 +4385,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s16_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4238,6 +4411,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s16_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4245,6 +4419,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s16_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4252,6 +4427,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s16_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4291,8 +4467,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -4302,14 +4477,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -4319,14 +4493,13 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -4336,17 +4509,17 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p0_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4354,6 +4527,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p0_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4431,6 +4605,7 @@ ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p0_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4484,6 +4659,7 @@ ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p0_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4491,6 +4667,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p0_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4526,6 +4703,7 @@ ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p0_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4551,6 +4729,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p0_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4558,6 +4737,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p0_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4565,6 +4745,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p0_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4590,6 +4771,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p0_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4597,6 +4779,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p0_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4604,6 +4787,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p0_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4629,6 +4813,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2p0_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -4636,6 +4821,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2p0_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -4643,6 +4829,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2p0_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -4682,8 +4869,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -4693,14 +4879,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -4710,17 +4895,17 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s96_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} @@ -4728,6 +4913,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s96_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -4788,6 +4974,7 @@ ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s96_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -4834,6 +5021,7 @@ ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s96_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} @@ -4841,6 +5029,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s96_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -4869,6 +5058,7 @@ ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s96_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -4899,6 +5089,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s96_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} @@ -4906,6 +5097,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s96_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -4913,6 +5105,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s96_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -4943,6 +5136,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s96_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} @@ -4950,6 +5144,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s96_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -4957,6 +5152,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s96_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -4987,6 +5183,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s96_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; CI-NEXT: {{ $}} @@ -4994,6 +5191,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; CI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s96_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -5001,6 +5199,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; VI-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s96_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: {{ $}} @@ -5040,8 +5239,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -5051,14 +5249,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -5068,14 +5265,13 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -5085,17 +5281,17 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s128_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -5103,6 +5299,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s128_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -5180,6 +5377,7 @@ ; VI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s128_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -5233,6 +5431,7 @@ ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s128_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -5240,6 +5439,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s128_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -5275,6 +5475,7 @@ ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s128_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -5300,6 +5501,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s128_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -5307,6 +5509,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s128_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -5314,6 +5517,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s128_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -5339,6 +5543,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s128_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -5346,6 +5551,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s128_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -5353,6 +5559,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s128_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -5378,6 +5585,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; CI-LABEL: name: test_store_global_s128_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: {{ $}} @@ -5385,6 +5593,7 @@ ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_s128_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -5392,6 +5601,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s128_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -5430,8 +5640,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -5441,14 +5650,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -5458,14 +5666,13 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -5475,14 +5682,13 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -5492,17 +5698,17 @@ ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -5514,6 +5720,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -5607,6 +5814,7 @@ ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -5670,6 +5878,7 @@ ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -5681,6 +5890,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -5722,6 +5932,7 @@ ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -5755,6 +5966,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -5766,6 +5978,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -5777,6 +5990,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -5810,6 +6024,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -5821,6 +6036,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -5832,6 +6048,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -5865,6 +6082,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -5876,6 +6094,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -5887,6 +6106,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -5929,8 +6149,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -5940,14 +6159,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -5957,14 +6175,13 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -5974,14 +6191,13 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -5991,17 +6207,17 @@ ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5p3_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6014,6 +6230,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5p3_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6108,6 +6325,7 @@ ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5p3_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6173,6 +6391,7 @@ ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5p3_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6185,6 +6404,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5p3_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6227,6 +6447,7 @@ ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5p3_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6262,6 +6483,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5p3_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6274,6 +6496,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5p3_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6286,6 +6509,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5p3_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6321,6 +6545,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5p3_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6333,6 +6558,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5p3_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6345,6 +6571,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5p3_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6380,6 +6607,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v5p3_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6392,6 +6620,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v5p3_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6404,6 +6633,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v5p3_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6439,6 +6669,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v10s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6451,6 +6682,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v10s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6463,6 +6695,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v10s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6508,6 +6741,7 @@ ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v11s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6530,6 +6764,7 @@ ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; CI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v11s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6552,6 +6787,7 @@ ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v11s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6598,6 +6834,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v12s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6611,6 +6848,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v12s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6624,6 +6862,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v12s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6669,8 +6908,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -6680,14 +6918,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -6697,14 +6934,13 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -6714,14 +6950,13 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -6731,17 +6966,17 @@ ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s160_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6754,6 +6989,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s160_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6848,6 +7084,7 @@ ; VI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s160_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6913,6 +7150,7 @@ ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s160_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6925,6 +7163,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s160_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6967,6 +7206,7 @@ ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s160_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -7002,6 +7242,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s160_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -7014,6 +7255,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s160_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -7026,6 +7268,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s160_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -7061,6 +7304,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s160_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -7073,6 +7317,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s160_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -7085,6 +7330,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s160_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -7120,6 +7366,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s160_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -7132,6 +7379,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s160_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -7144,6 +7392,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s160_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -7188,8 +7437,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -7199,14 +7447,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -7216,14 +7463,13 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -7233,14 +7479,13 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -7251,14 +7496,13 @@ ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -7267,14 +7511,13 @@ ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY17]](s32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY17]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY16]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY18]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR15]], [[COPY18]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) @@ -7283,14 +7526,13 @@ ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY20]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY20]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY19]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY21]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LSHR18]], [[COPY21]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) @@ -7299,17 +7541,17 @@ ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY23]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY23]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY22]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY24]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR21]], [[COPY24]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7320,6 +7562,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7463,6 +7706,7 @@ ; VI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; VI-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR23]](s16) ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7545,6 +7789,7 @@ ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7555,6 +7800,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7616,6 +7862,7 @@ ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7647,6 +7894,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7657,6 +7905,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7667,6 +7916,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7698,6 +7948,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7708,6 +7959,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7718,6 +7970,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7749,6 +8002,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7759,6 +8013,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7769,6 +8024,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7801,6 +8057,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v2s128_align32 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -7812,6 +8069,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s128_align32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -7823,6 +8081,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v2s128_align32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -7867,8 +8126,7 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -7878,14 +8136,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY6]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY6]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -7895,14 +8152,13 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY9]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -7912,14 +8168,13 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY11]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY10]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY12]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -7930,14 +8185,13 @@ ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY14]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY13]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY15]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY15]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -7946,14 +8200,13 @@ ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY17]](s32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY17]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY16]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY18]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR15]], [[COPY18]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) @@ -7962,14 +8215,13 @@ ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY20]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY20]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY19]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY21]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LSHR18]], [[COPY21]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) @@ -7978,17 +8230,17 @@ ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY23]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY23]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY22]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; SI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY24]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR21]], [[COPY24]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8000,6 +8252,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8144,6 +8397,7 @@ ; VI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; VI-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR23]](s16) ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8228,6 +8482,7 @@ ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8239,6 +8494,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8301,6 +8557,7 @@ ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8334,6 +8591,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8345,6 +8603,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8356,6 +8615,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8389,6 +8649,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8400,6 +8661,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8411,6 +8673,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8444,6 +8707,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8455,6 +8719,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8466,6 +8731,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8499,6 +8765,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_s256_align32 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8510,6 +8777,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_s256_align32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8521,6 +8789,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_s256_align32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8553,6 +8822,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v8s32_align32 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI-NEXT: {{ $}} @@ -8563,6 +8833,7 @@ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v8s32_align32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: {{ $}} @@ -8573,6 +8844,7 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v8s32_align32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9-NEXT: {{ $}} @@ -8618,8 +8890,7 @@ ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) @@ -8629,14 +8900,13 @@ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY7]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY7]](s32) ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY8]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR3]], [[COPY8]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD4]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) @@ -8646,14 +8916,13 @@ ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY10]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY10]](s32) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY11]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[COPY11]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD8]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) @@ -8663,14 +8932,13 @@ ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY13]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY13]](s32) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR10]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY14]](s32) + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[COPY14]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD12]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR9]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) @@ -8680,14 +8948,13 @@ ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY15]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY16]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY16]](s32) ; SI-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY15]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR13]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C3]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY17]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR12]], [[COPY17]](s32) ; SI-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD16]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) @@ -8696,14 +8963,13 @@ ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY18]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]] - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY19]](s32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY19]](s32) ; SI-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY18]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR16]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C3]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY20]](s32) + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR15]], [[COPY20]](s32) ; SI-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD20]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) @@ -8712,14 +8978,13 @@ ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY21]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY22]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY22]](s32) ; SI-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY21]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR19]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C3]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY23]](s32) + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LSHR18]], [[COPY23]](s32) ; SI-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD24]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) @@ -8728,14 +8993,13 @@ ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY24]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY25]](s32) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY25]](s32) ; SI-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY24]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI-NEXT: G_STORE [[LSHR22]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; SI-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C3]] - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY26]](s32) + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR21]], [[COPY26]](s32) ; SI-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD28]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR21]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) @@ -8745,17 +9009,17 @@ ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY27]], [[C]](s32) ; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) ; SI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]] - ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY28]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]] + ; SI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY28]](s32) ; SI-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY27]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) ; SI-NEXT: G_STORE [[LSHR25]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 33, addrspace 1) ; SI-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR24]], [[C3]] - ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY29]](s32) + ; SI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LSHR24]], [[COPY29]](s32) ; SI-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD32]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 34, addrspace 1) ; SI-NEXT: G_STORE [[LSHR26]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v9s32_align1 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} @@ -8775,6 +9039,7 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v9s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -8937,6 +9202,7 @@ ; VI-NEXT: G_STORE [[LSHR24]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 34, addrspace 1) ; VI-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR26]](s16) ; VI-NEXT: G_STORE [[ANYEXT17]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v9s32_align1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -9040,6 +9306,7 @@ ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v9s32_align2 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} @@ -9059,6 +9326,7 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v9s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -9129,6 +9397,7 @@ ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) ; VI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v9s32_align2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -9181,6 +9450,7 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v9s32_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} @@ -9200,6 +9470,7 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v9s32_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -9219,6 +9490,7 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v9s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -9271,6 +9543,7 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v9s32_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} @@ -9290,6 +9563,7 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v9s32_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -9309,6 +9583,7 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v9s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} @@ -9361,6 +9636,7 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; ; CI-LABEL: name: test_store_global_v9s32_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; CI-NEXT: {{ $}} @@ -9380,6 +9656,7 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v9s32_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -9399,6 +9676,7 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; ; GFX9-LABEL: name: test_store_global_v9s32_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -22,6 +22,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -45,6 +46,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -68,6 +70,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store (p1), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -91,6 +94,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store (p4), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -114,6 +118,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) + ; ; VI-LABEL: name: test_store_global_p3 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -137,6 +142,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; SI-NEXT: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -160,6 +166,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -188,6 +195,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -212,6 +220,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -237,6 +246,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -269,6 +279,7 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s16_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -301,6 +312,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -332,6 +344,7 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s32_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -376,11 +389,11 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s64_to_s32_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -424,6 +437,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) ; SI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s128_to_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -449,6 +463,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_s128_to_s8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -476,6 +491,7 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] ; SI-NEXT: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -503,6 +519,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -527,6 +544,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -557,6 +575,7 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; ; VI-LABEL: name: test_store_global_96 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -583,6 +602,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_i128 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -607,6 +627,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -631,8 +652,8 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) @@ -649,14 +670,15 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -688,8 +710,8 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -699,14 +721,15 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -733,8 +756,8 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -744,14 +767,15 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -779,8 +803,8 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) @@ -813,6 +837,7 @@ ; SI-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -820,8 +845,8 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -870,8 +895,8 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) @@ -897,6 +922,7 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -904,8 +930,8 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -949,8 +975,8 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) @@ -976,6 +1002,7 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -983,8 +1010,8 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -1053,11 +1080,11 @@ ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY3]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s8_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1082,8 +1109,8 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C5]] @@ -1141,6 +1168,7 @@ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s8_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1201,6 +1229,7 @@ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI-NEXT: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) + ; ; VI-LABEL: name: test_store_global_v4s8_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1241,6 +1270,7 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s8>) = G_IMPLICIT_DEF ; SI-NEXT: G_STORE [[DEF]](<2 x s8>), [[COPY]](p1) :: (store (<2 x s4>), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_v2s8_to_1_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1266,6 +1296,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) ; SI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_v3s8_to_1_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} @@ -1293,6 +1324,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) ; SI-NEXT: G_STORE [[TRUNC]](<3 x s8>), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1) + ; ; VI-LABEL: name: test_truncstore_global_v3s8_to_2_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; VI-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir @@ -126,8 +126,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -153,8 +153,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -355,8 +355,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](<2 x s128>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s128) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s128) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -403,8 +403,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s96) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir @@ -139,22 +139,20 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND5]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND3]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -190,16 +188,14 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[AND7]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND8]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND6]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) @@ -209,26 +205,25 @@ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL2]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -271,28 +266,24 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[AND7]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND8]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[AND10]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD3]](s32), [[AND11]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD1]](s32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD2]](s32), [[AND6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD3]](s32), [[AND7]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND11]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND7]](s32) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) @@ -302,11 +293,11 @@ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -25,6 +25,7 @@ ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: uaddsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -39,6 +40,7 @@ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: uaddsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -82,6 +84,7 @@ ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: uaddsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -96,6 +99,7 @@ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: uaddsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -147,17 +151,14 @@ ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C1]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[COPY2]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: uaddsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -179,13 +180,11 @@ ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL2]], [[SHL3]] ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C1]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR3]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR2]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: uaddsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -250,6 +249,7 @@ ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: uaddsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -260,6 +260,7 @@ ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: uaddsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -308,13 +309,11 @@ ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: uaddsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -337,6 +336,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: uaddsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -392,24 +392,21 @@ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: uaddsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -447,13 +444,13 @@ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: uaddsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -548,19 +545,15 @@ ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[XOR3]], [[SHL7]] ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[UMIN3]] ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ADD3]], [[C]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL8]] ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL9]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: uaddsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -601,6 +594,7 @@ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: uaddsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -634,6 +628,7 @@ ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[COPY1]] ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[UMIN]] ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX8-LABEL: name: uaddsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -641,6 +636,7 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) + ; ; GFX9-LABEL: name: uaddsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -676,6 +672,7 @@ ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UMIN1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: uaddsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -687,6 +684,7 @@ ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: uaddsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -723,6 +721,7 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDE1]](s1), [[C]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: uaddsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -736,6 +735,7 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDE1]](s1), [[C]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: uaddsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -783,6 +783,7 @@ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[UADDE3]](s1), [[C]], [[MV1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: uaddsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -805,6 +806,7 @@ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[UADDE3]](s1), [[C]], [[MV1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: uaddsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -39,6 +39,7 @@ ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX8-LABEL: name: test_udiv_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -67,6 +68,7 @@ ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX9-LABEL: name: test_udiv_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -95,6 +97,7 @@ ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX10-LABEL: name: test_udiv_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -185,6 +188,7 @@ ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_udiv_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -235,6 +239,7 @@ ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_udiv_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -285,6 +290,7 @@ ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_udiv_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -497,6 +503,7 @@ ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX8-LABEL: name: test_udiv_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -647,6 +654,7 @@ ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX9-LABEL: name: test_udiv_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -797,6 +805,7 @@ ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX10-LABEL: name: test_udiv_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -1249,6 +1258,7 @@ ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: test_udiv_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1539,6 +1549,7 @@ ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_udiv_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -1829,6 +1840,7 @@ ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_udiv_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} @@ -2163,6 +2175,7 @@ ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] ; GFX6-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX8-LABEL: name: test_udiv_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2195,6 +2208,7 @@ ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX9-LABEL: name: test_udiv_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2227,6 +2241,7 @@ ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX10-LABEL: name: test_udiv_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2309,33 +2324,32 @@ ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_udiv_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2371,33 +2385,32 @@ ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C1]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT2]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_udiv_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2434,30 +2447,29 @@ ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT5]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_udiv_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2494,25 +2506,23 @@ ; GFX10-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32) - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD3]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[LSHR1]] ; GFX10-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[LSHR1]] ; GFX10-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[LSHR1]] ; GFX10-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] ; GFX10-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT5]](s32) @@ -2561,6 +2571,7 @@ ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX8-LABEL: name: test_udiv_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2592,6 +2603,7 @@ ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX9-LABEL: name: test_udiv_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2623,6 +2635,7 @@ ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX10-LABEL: name: test_udiv_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2700,6 +2713,7 @@ ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX8-LABEL: name: test_udiv_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2731,6 +2745,7 @@ ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX9-LABEL: name: test_udiv_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2762,6 +2777,7 @@ ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT2]](s32) + ; ; GFX10-LABEL: name: test_udiv_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2961,6 +2977,7 @@ ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX8-LABEL: name: test_udiv_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -3114,6 +3131,7 @@ ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX9-LABEL: name: test_udiv_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -3267,6 +3285,7 @@ ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX10-LABEL: name: test_udiv_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -18,6 +18,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; VI-LABEL: name: test_umax_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; GFX9-LABEL: name: test_umax_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -52,6 +54,7 @@ ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; VI-LABEL: name: test_umax_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -60,6 +63,7 @@ ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: test_umax_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -90,6 +94,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; VI-LABEL: name: test_umax_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -100,6 +105,7 @@ ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umax_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -135,26 +141,28 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; VI-LABEL: name: test_umax_s8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umax_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -186,6 +194,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; VI-LABEL: name: test_umax_s17 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -196,6 +205,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[UMAX]](s32) + ; ; GFX9-LABEL: name: test_umax_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -232,6 +242,7 @@ ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_umax_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -243,6 +254,7 @@ ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umax_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -278,6 +290,7 @@ ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_umax_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -290,6 +303,7 @@ ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_umax_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -328,15 +342,12 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMAX1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMAX]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_umax_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -359,6 +370,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_umax_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -396,14 +408,13 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_umax_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -432,6 +443,7 @@ ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -484,27 +496,20 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[AND6]], [[AND7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[LSHR1]], [[LSHR3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMAX1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMAX]], [[SHL]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMAX2]], [[C1]] - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMAX3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UMAX3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UMAX2]], [[SHL1]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_umax_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -545,6 +550,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_umax_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -18,6 +18,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; VI-LABEL: name: test_umin_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; GFX9-LABEL: name: test_umin_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -52,6 +54,7 @@ ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; VI-LABEL: name: test_umin_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -60,6 +63,7 @@ ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: test_umin_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -90,6 +94,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; VI-LABEL: name: test_umin_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -100,6 +105,7 @@ ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umin_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -135,26 +141,28 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; VI-LABEL: name: test_umin_s8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umin_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -186,6 +194,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; VI-LABEL: name: test_umin_s17 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -196,6 +205,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[UMIN]](s32) + ; ; GFX9-LABEL: name: test_umin_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -232,6 +242,7 @@ ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_umin_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -243,6 +254,7 @@ ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umin_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -278,6 +290,7 @@ ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_umin_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -290,6 +303,7 @@ ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_umin_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -328,15 +342,12 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMIN1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMIN]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_umin_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -359,6 +370,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_umin_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -396,14 +408,13 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_umin_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -432,6 +443,7 @@ ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -484,27 +496,20 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[AND6]], [[AND7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR2]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[LSHR1]], [[LSHR3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UMIN1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UMIN]], [[SHL]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMIN2]], [[C1]] - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMIN3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UMIN3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UMIN2]], [[SHL1]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_umin_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -545,6 +550,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_umin_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir @@ -15,6 +15,7 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[UMULH]](s32) + ; ; GFX9-LABEL: name: test_umulh_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -45,6 +46,7 @@ ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umulh_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -98,6 +100,7 @@ ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_umulh_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -197,6 +200,7 @@ ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_umulh_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -277,8 +281,8 @@ ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX9-LABEL: name: test_umulh_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -290,8 +294,7 @@ ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -312,33 +315,34 @@ ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX9-LABEL: name: test_umulh_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 @@ -372,17 +376,15 @@ ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[LSHR2]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umulh_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -400,9 +402,7 @@ ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -427,8 +427,8 @@ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -448,21 +448,19 @@ ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C1]](s16) ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[LSHR1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C1]](s16) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[LSHR2]], [[SHL1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_umulh_v3s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} @@ -503,11 +501,10 @@ ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND8]], [[C1]](s16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[LSHR1]], [[SHL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) @@ -546,8 +543,8 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -560,12 +557,11 @@ ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[LSHR1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umulh_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -635,8 +631,8 @@ ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -661,8 +657,8 @@ ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[AND6]], [[AND7]] ; GFX8-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[MUL3]], [[C4]](s16) - ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] @@ -677,6 +673,7 @@ ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; ; GFX9-LABEL: name: test_umulh_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir @@ -20,6 +20,7 @@ ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[MUL]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_umulo_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -69,6 +70,7 @@ ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umulo_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -84,8 +86,8 @@ ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UMULH1]](s32), [[C]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MUL]](s32), [[MUL1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] @@ -149,6 +151,7 @@ ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[ZEXT5]](s64) + ; ; GFX9-LABEL: name: test_umulo_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -283,14 +286,15 @@ ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; ; GFX9-LABEL: name: test_umulo_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -368,8 +372,8 @@ ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV26]](s32) ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MV2]](s64), [[C]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV1]](s64), [[MV3]](s64) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT2]], [[C1]] ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT3]], [[C1]] @@ -409,6 +413,7 @@ ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_umulo_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -461,6 +466,7 @@ ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_umulo_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -508,6 +514,7 @@ ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[AND3]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_umulo_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -572,10 +579,10 @@ ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[AND11]](s32) + ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND10]](s32), [[LSHR]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s32>) ; GFX8-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_umulo_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -594,8 +601,8 @@ ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] @@ -640,8 +647,8 @@ ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] @@ -654,6 +661,7 @@ ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) ; GFX8-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; ; GFX9-LABEL: name: test_umulo_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -672,8 +680,8 @@ ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[AND4]] ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C]] ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[MUL1]](s32), [[AND5]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] @@ -740,22 +748,21 @@ ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] - ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] - ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] - ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LSHR2]], [[LSHR5]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL]] + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX8-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) + ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) ; GFX8-NEXT: $vgpr1 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_umulo_v4s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -782,18 +789,16 @@ ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[AND6]] - ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[AND8]] - ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] - ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL]] - ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LSHR2]], [[LSHR5]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C3]] + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[MUL1]], [[C3]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[MUL2]], [[C3]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) + ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL3]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -263,10 +263,9 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY2]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LSHR1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32) @@ -335,13 +334,11 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32) @@ -389,13 +386,11 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[LSHR]](s32) @@ -451,29 +446,23 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY4]](s32) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY5]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY5]](s32) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY6]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY6]](s32) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY7]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY8]](s32) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY9]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[C1]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C3]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[C4]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY9]](s32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C3]](s32) + ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[C4]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LSHR2]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[LSHR3]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -36,6 +36,7 @@ ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX8-LABEL: name: test_urem_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -61,6 +62,7 @@ ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX9-LABEL: name: test_urem_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -86,6 +88,7 @@ ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX10-LABEL: name: test_urem_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -168,6 +171,7 @@ ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_urem_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -213,6 +217,7 @@ ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_urem_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -258,6 +263,7 @@ ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX10-LABEL: name: test_urem_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -461,6 +467,7 @@ ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX8-LABEL: name: test_urem_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -607,6 +614,7 @@ ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX9-LABEL: name: test_urem_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -753,6 +761,7 @@ ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX10-LABEL: name: test_urem_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -1194,6 +1203,7 @@ ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: test_urem_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -1477,6 +1487,7 @@ ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_urem_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -1760,6 +1771,7 @@ ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX10-LABEL: name: test_urem_v2s64 ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX10-NEXT: {{ $}} @@ -2084,6 +2096,7 @@ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] ; GFX6-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX8-LABEL: name: test_urem_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2113,6 +2126,7 @@ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX9-LABEL: name: test_urem_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2142,6 +2156,7 @@ ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) + ; ; GFX10-LABEL: name: test_urem_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2218,31 +2233,30 @@ ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX6-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX6-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX6-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] ; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] - ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] - ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] - ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SELECT3]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT3]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_urem_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2275,31 +2289,30 @@ ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX8-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX8-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX8-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] - ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] - ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] - ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SELECT3]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT3]], [[C1]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_urem_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2333,28 +2346,27 @@ ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX9-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX9-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] - ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] - ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX9-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] + ; GFX9-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] - ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] + ; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; ; GFX10-LABEL: name: test_urem_v2s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2388,24 +2400,22 @@ ; GFX10-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX10-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) + ; GFX10-NEXT: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[LSHR1]](s32) ; GFX10-NEXT: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX10-NEXT: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX10-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[LSHR1]] ; GFX10-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] ; GFX10-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] - ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] - ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] - ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] - ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX10-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[ADD1]] + ; GFX10-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[LSHR1]] + ; GFX10-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[MUL3]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[LSHR1]] + ; GFX10-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[LSHR1]] ; GFX10-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] - ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[LSHR1]] + ; GFX10-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[LSHR1]] ; GFX10-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT3]](s32) ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) @@ -2450,6 +2460,7 @@ ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX8-LABEL: name: test_urem_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2478,6 +2489,7 @@ ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX9-LABEL: name: test_urem_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2506,6 +2518,7 @@ ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX10-LABEL: name: test_urem_s7 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2577,6 +2590,7 @@ ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX8-LABEL: name: test_urem_s17 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -2605,6 +2619,7 @@ ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX9-LABEL: name: test_urem_s17 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -2633,6 +2648,7 @@ ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) + ; ; GFX10-LABEL: name: test_urem_s17 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -2825,6 +2841,7 @@ ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX8-LABEL: name: test_urem_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -2974,6 +2991,7 @@ ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX9-LABEL: name: test_urem_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -3123,6 +3141,7 @@ ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) + ; ; GFX10-LABEL: name: test_urem_s33 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -27,14 +27,15 @@ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) + ; ; GFX8-LABEL: name: ushlsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -46,14 +47,15 @@ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ushlsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -96,14 +98,15 @@ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) + ; ; GFX8-LABEL: name: ushlsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -115,14 +118,15 @@ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ushlsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) @@ -174,16 +178,14 @@ ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C1]](s32) - ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[COPY2]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: ushlsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -193,8 +195,8 @@ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) @@ -213,12 +215,11 @@ ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR5]], [[C2]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR3]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ushlsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -228,8 +229,8 @@ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) @@ -248,10 +249,8 @@ ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] - ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR5]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR3]], [[SHL4]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 @@ -288,6 +287,7 @@ ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) + ; ; GFX8-LABEL: name: ushlsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -302,6 +302,7 @@ ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: ushlsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -350,19 +351,17 @@ ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR1]](s32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[LSHR1]](s32) ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: ushlsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -392,6 +391,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: ushlsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -441,51 +441,46 @@ ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[LSHR1]](s32) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[LSHR1]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR3]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL1]] ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND]](s32) ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR5]] - ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL3]] ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[LSHR2]](s32) + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[LSHR2]](s32) ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR7]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL5]] ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL6]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR8]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: ushlsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -533,13 +528,13 @@ ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL5]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: ushlsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -623,39 +618,34 @@ ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR4]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[LSHR2]](s32) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[LSHR2]](s32) ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR6]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND1]](s32) + ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND1]](s32) ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR8]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) - ; GFX6-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[SHL7]], [[AND3]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[LSHR3]](s32) + ; GFX6-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[SHL7]], [[LSHR3]](s32) ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[LSHR10]] ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C2]], [[SHL7]] ; GFX6-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[SELECT3]], [[C]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL8]] ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C1]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR11]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL9]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: ushlsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -709,6 +699,7 @@ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: ushlsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -777,6 +768,7 @@ ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] ; GFX6-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX8-LABEL: name: ushlsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -788,6 +780,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX9-LABEL: name: ushlsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -829,6 +822,7 @@ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: ushlsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -847,6 +841,7 @@ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: ushlsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -889,6 +884,7 @@ ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: ushlsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -901,6 +897,7 @@ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: ushlsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -945,6 +942,7 @@ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: ushlsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -965,6 +963,7 @@ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: ushlsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir @@ -139,22 +139,20 @@ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND5]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND3]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -190,16 +188,14 @@ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND6]], [[AND7]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND8]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND6]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) @@ -209,26 +205,25 @@ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL2]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -271,28 +266,24 @@ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[AND4]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND6]], [[AND7]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND8]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[AND9]], [[AND10]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB3]](s32), [[AND11]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB1]](s32), [[AND3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND4]], [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB2]](s32), [[AND6]] + ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB3]](s32), [[AND7]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND5]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND8]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND11]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND7]](s32) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) @@ -302,11 +293,11 @@ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -23,6 +23,7 @@ ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: usubsat_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -37,6 +38,7 @@ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: usubsat_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -78,6 +80,7 @@ ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: usubsat_s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -92,6 +95,7 @@ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: usubsat_s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -140,17 +144,14 @@ ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C1]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[COPY2]](s32) ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: usubsat_v2s8 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -172,13 +173,11 @@ ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL2]], [[SHL3]] ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C1]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[LSHR3]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR2]], [[SHL4]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: usubsat_v2s8 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -241,6 +240,7 @@ ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) + ; ; GFX8-LABEL: name: usubsat_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -251,6 +251,7 @@ ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: usubsat_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -296,13 +297,11 @@ ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL4]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: usubsat_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -325,6 +324,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: usubsat_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -376,24 +376,21 @@ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL7]] ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX8-LABEL: name: usubsat_v3s16 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} @@ -431,13 +428,13 @@ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: usubsat_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -527,19 +524,15 @@ ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[SHL6]], [[SHL7]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[UMIN3]] ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SUB3]], [[C]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL8]] ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL9]] ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: usubsat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -580,6 +573,7 @@ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: usubsat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -611,6 +605,7 @@ ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[UMIN]] ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; ; GFX8-LABEL: name: usubsat_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -618,6 +613,7 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) + ; ; GFX9-LABEL: name: usubsat_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -650,6 +646,7 @@ ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UMIN1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: usubsat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -661,6 +658,7 @@ ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: usubsat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -697,6 +695,7 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBE1]](s1), [[C]], [[MV]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: usubsat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -710,6 +709,7 @@ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBE1]](s1), [[C]], [[MV]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: usubsat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -757,6 +757,7 @@ ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[USUBE3]](s1), [[C]], [[MV1]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: usubsat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: {{ $}} @@ -779,6 +780,7 @@ ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[USUBE3]](s1), [[C]], [[MV1]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: usubsat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -472,10 +472,9 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -533,10 +532,9 @@ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -545,10 +543,9 @@ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) @@ -563,20 +560,17 @@ ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL5]] ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -28,8 +28,8 @@ ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(s32) = COPY $vgpr0 @@ -138,8 +138,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s32>) = G_ZEXT %0 @@ -163,9 +162,8 @@ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -191,10 +189,8 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32), [[LSHR1]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = G_ZEXT %0 @@ -275,8 +271,8 @@ ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](s16) %0:_(s32) = COPY $vgpr0 @@ -654,8 +650,8 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -665,13 +661,11 @@ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C4]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C3]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[C5]], [[C4]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C5]], [[SHL2]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) @@ -715,67 +709,61 @@ ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C2]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL3]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV]], [[SHL2]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[SHL3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF]](s64), 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV]](s64), 0 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C3]](s64) + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND6]](s64) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND2]](s64) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C2]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL1]] + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL4]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C1]] + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL5]] + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[OR8]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR7]](s32) ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL1]] - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL1]] + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR9]](s32), [[OR10]](s32) - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[AND5]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[AND1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV7]](s384) ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir @@ -1117,8 +1117,7 @@ ; SOFT-NOT: G_FCMP ; For soft float we just need to return a '-1' constant, but the truncation ; to 1 bit is converted by the combiner to the following masking sequence. - ; SOFT: [[R:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SOFT: [[REXT:%[0-9]+]]:_(s32) = COPY [[R]](s32) + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SOFT-NOT: G_FCMP ; CHECK: $r0 = COPY [[REXT]] ... @@ -1152,8 +1151,7 @@ ; SOFT-NOT: G_FCMP ; For soft float we just need to return a '0' constant, but the truncation ; to 1 bit is converted by the combiner to the following masking sequence. - ; SOFT: [[R:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SOFT: [[REXT:%[0-9]+]]:_(s32) = COPY [[R]](s32) + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; SOFT-NOT: G_FCMP ; CHECK: $r0 = COPY [[REXT]] ... @@ -1825,8 +1823,7 @@ ; SOFT-NOT: G_FCMP ; The result needs to be truncated, and the combiner turns the truncation ; into the following masking sequence. - ; SOFT: [[MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SOFT: [[REXT:%[0-9]+]]:_(s32) = COPY [[MASK]] + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) $r0 = COPY %7(s32) @@ -1870,11 +1867,9 @@ ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(false), [[X]](s64), [[Y]] ; HARD: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) ; SOFT-NOT: G_FCMP - ; SOFT: [[R:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; The result needs to be truncated, and the combiner turns the truncation ; into the following masking sequence. - ; SOFT: [[REXT:%[0-9]+]]:_(s32) = COPY [[R]] - ; SOFT-NOT: G_FCMP ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) $r0 = COPY %7(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir @@ -24,11 +24,12 @@ ; MIPS32-LABEL: name: add_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; MIPS32: $v0 = COPY [[ADD]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[ADD]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_ADD %0, %1 @@ -46,14 +47,15 @@ ; MIPS32-LABEL: name: add_i8_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -74,13 +76,14 @@ ; MIPS32-LABEL: name: add_i8_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -101,11 +104,12 @@ ; MIPS32-LABEL: name: add_i8_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[ADD]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[ADD]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -126,14 +130,15 @@ ; MIPS32-LABEL: name: add_i16_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -154,13 +159,14 @@ ; MIPS32-LABEL: name: add_i16_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -181,11 +187,12 @@ ; MIPS32-LABEL: name: add_i16_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[ADD]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[ADD]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -206,19 +213,18 @@ ; MIPS32-LABEL: name: add_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[COPY1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY1]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] - ; MIPS32: $v0 = COPY [[ADD2]](s32) - ; MIPS32: $v1 = COPY [[ADD]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY1]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[ADD2]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %3(s32), %2(s32) @@ -247,44 +253,44 @@ ; MIPS32-LABEL: name: add_i128 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0) - ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) - ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2) - ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[COPY]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[COPY1]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[LOAD1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND1]] - ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[COPY2]] - ; MIPS32: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[LOAD2]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C]] - ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[AND2]] - ; MIPS32: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD4]](s32), [[C1]] - ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[OR]] - ; MIPS32: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP3]], [[AND3]] - ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[LOAD3]], [[COPY3]] - ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C]] - ; MIPS32: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[AND4]] - ; MIPS32: $v0 = COPY [[ADD]](s32) - ; MIPS32: $v1 = COPY [[ADD2]](s32) - ; MIPS32: $a0 = COPY [[ADD4]](s32) - ; MIPS32: $a1 = COPY [[ADD6]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0) + ; MIPS32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) + ; MIPS32-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; MIPS32-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2) + ; MIPS32-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[COPY]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[LOAD1]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[COPY2]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[LOAD2]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[AND1]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD4]](s32), [[C]] + ; MIPS32-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[OR]] + ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP3]], [[AND2]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[LOAD3]], [[COPY3]] + ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C1]] + ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[AND3]] + ; MIPS32-NEXT: $v0 = COPY [[ADD]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD2]](s32) + ; MIPS32-NEXT: $a0 = COPY [[ADD4]](s32) + ; MIPS32-NEXT: $a1 = COPY [[ADD6]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %4:_(s32) = COPY $a2 @@ -318,18 +324,18 @@ ; MIPS32-LABEL: name: uadd_with_overflow ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; MIPS32: G_STORE [[AND1]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) - ; MIPS32: G_STORE [[ADD]](s32), [[COPY2]](p0) :: (store (s32) into %ir.padd) - ; MIPS32: RetRA + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY1]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] + ; MIPS32-NEXT: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) + ; MIPS32-NEXT: G_STORE [[ADD]](s32), [[COPY2]](p0) :: (store (s32) into %ir.padd) + ; MIPS32-NEXT: RetRA %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(p0) = COPY $a2 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/bitwise.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/bitwise.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/bitwise.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/bitwise.mir @@ -42,11 +42,12 @@ ; MIPS32-LABEL: name: and_i1 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -67,11 +68,12 @@ ; MIPS32-LABEL: name: and_i8 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -92,11 +94,12 @@ ; MIPS32-LABEL: name: and_i16 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -117,11 +120,12 @@ ; MIPS32-LABEL: name: and_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_AND %1, %0 @@ -139,15 +143,16 @@ ; MIPS32-LABEL: name: and_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[COPY1]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: $v1 = COPY [[AND1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: $v1 = COPY [[AND1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -171,11 +176,12 @@ ; MIPS32-LABEL: name: or_i1 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[OR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[OR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -196,11 +202,12 @@ ; MIPS32-LABEL: name: or_i8 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[OR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[OR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -221,11 +228,12 @@ ; MIPS32-LABEL: name: or_i16 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[OR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[OR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -246,11 +254,12 @@ ; MIPS32-LABEL: name: or_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[OR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[OR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_OR %1, %0 @@ -268,15 +277,16 @@ ; MIPS32-LABEL: name: or_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY]] - ; MIPS32: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY1]] - ; MIPS32: $v0 = COPY [[OR]](s32) - ; MIPS32: $v1 = COPY [[OR1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[OR]](s32) + ; MIPS32-NEXT: $v1 = COPY [[OR1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -300,11 +310,12 @@ ; MIPS32-LABEL: name: xor_i1 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[XOR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[XOR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -325,11 +336,12 @@ ; MIPS32-LABEL: name: xor_i8 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[XOR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[XOR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -350,11 +362,12 @@ ; MIPS32-LABEL: name: xor_i16 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[XOR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[XOR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -375,11 +388,12 @@ ; MIPS32-LABEL: name: xor_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[XOR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[XOR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_XOR %1, %0 @@ -397,15 +411,16 @@ ; MIPS32-LABEL: name: xor_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY]] - ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY3]], [[COPY1]] - ; MIPS32: $v0 = COPY [[XOR]](s32) - ; MIPS32: $v1 = COPY [[XOR1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[XOR]](s32) + ; MIPS32-NEXT: $v1 = COPY [[XOR1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -429,11 +444,12 @@ ; MIPS32-LABEL: name: shl ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; MIPS32: $v0 = COPY [[SHL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[SHL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_SHL %0, %1 @@ -451,11 +467,12 @@ ; MIPS32-LABEL: name: ashr ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_ASHR %0, %1 @@ -473,11 +490,12 @@ ; MIPS32-LABEL: name: lshr ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; MIPS32: $v0 = COPY [[LSHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[LSHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_LSHR %0, %1 @@ -495,11 +513,12 @@ ; MIPS32-LABEL: name: lshr_i64_shift_amount ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; MIPS32: $v0 = COPY [[LSHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[LSHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s64) = G_CONSTANT i64 1 %2:_(s32) = G_LSHR %0, %1 @@ -517,11 +536,12 @@ ; MIPS32-LABEL: name: shlv ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[SHL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; MIPS32-NEXT: $v0 = COPY [[SHL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_SHL %0, %1 @@ -539,11 +559,12 @@ ; MIPS32-LABEL: name: ashrv ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_ASHR %0, %1 @@ -561,11 +582,12 @@ ; MIPS32-LABEL: name: lshrv ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[LSHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32) + ; MIPS32-NEXT: $v0 = COPY [[LSHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_LSHR %0, %1 @@ -583,12 +605,12 @@ ; MIPS32-LABEL: name: shl_i16 ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[SHL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[SHL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %1:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %1(s32) %2:_(s16) = G_CONSTANT i16 2 @@ -608,15 +630,15 @@ ; MIPS32-LABEL: name: ashr_i8 ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32) - ; MIPS32: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[ASHR1]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32) + ; MIPS32-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %1:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %1(s32) %2:_(s8) = G_CONSTANT i8 2 @@ -636,14 +658,14 @@ ; MIPS32-LABEL: name: lshr_i16 ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) - ; MIPS32: $v0 = COPY [[LSHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[LSHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %1:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %1(s32) %2:_(s16) = G_CONSTANT i16 2 @@ -663,30 +685,27 @@ ; MIPS32-LABEL: name: shl_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY2]](s32) - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[SUB1]](s32) - ; MIPS32: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY2]](s32) - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; MIPS32: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[SUB]](s32) - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[SHL]], [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; MIPS32: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; MIPS32: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[COPY1]], [[SELECT1]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: $v1 = COPY [[SELECT2]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY2]](s32) + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[SUB1]](s32) + ; MIPS32-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[COPY2]](s32) + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] + ; MIPS32-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[SUB]](s32) + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SHL]], [[C1]] + ; MIPS32-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[SHL2]] + ; MIPS32-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[COPY1]], [[SELECT1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: $v1 = COPY [[SELECT2]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -710,32 +729,29 @@ ; MIPS32-LABEL: name: ashl_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[COPY2]](s32) - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY2]](s32) - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[SUB1]](s32) - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; MIPS32: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C2]](s32) - ; MIPS32: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[SUB]](s32) - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[ASHR2]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C3]] - ; MIPS32: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[SELECT]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; MIPS32: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ASHR]], [[ASHR1]] - ; MIPS32: $v0 = COPY [[SELECT1]](s32) - ; MIPS32: $v1 = COPY [[SELECT2]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[COPY2]](s32) + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY2]](s32) + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[SUB1]](s32) + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] + ; MIPS32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; MIPS32-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C2]](s32) + ; MIPS32-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[SUB]](s32) + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[ASHR2]] + ; MIPS32-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[COPY]], [[SELECT]] + ; MIPS32-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ASHR]], [[ASHR1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT1]](s32) + ; MIPS32-NEXT: $v1 = COPY [[SELECT2]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -759,30 +775,27 @@ ; MIPS32-LABEL: name: lshr_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[COPY2]](s32) - ; MIPS32: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY2]](s32) - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[SUB1]](s32) - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] - ; MIPS32: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[SUB]](s32) - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[LSHR2]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; MIPS32: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[COPY]], [[SELECT]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; MIPS32: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[LSHR]], [[C1]] - ; MIPS32: $v0 = COPY [[SELECT1]](s32) - ; MIPS32: $v1 = COPY [[SELECT2]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[C]] + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY2]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY2]](s32), [[C1]] + ; MIPS32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[COPY2]](s32) + ; MIPS32-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY2]](s32) + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[SUB1]](s32) + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] + ; MIPS32-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[SUB]](s32) + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR2]] + ; MIPS32-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[COPY]], [[SELECT]] + ; MIPS32-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[LSHR]], [[C1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT1]](s32) + ; MIPS32-NEXT: $v1 = COPY [[SELECT2]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/constants.mir @@ -20,10 +20,10 @@ bb.1.entry: ; MIPS32-LABEL: name: any_i64 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; MIPS32: $v0 = COPY [[C1]](s32) - ; MIPS32: $v1 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; MIPS32-NEXT: $v0 = COPY [[C1]](s32) + ; MIPS32-NEXT: $v1 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %0:_(s64) = G_CONSTANT i64 -9223372036854775808 %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(s64) $v0 = COPY %2(s32) @@ -39,8 +39,8 @@ bb.1.entry: ; MIPS32-LABEL: name: any_i32 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; MIPS32: $v0 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: $v0 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = G_CONSTANT i32 -2147483648 $v0 = COPY %0(s32) RetRA implicit $v0 @@ -54,9 +54,9 @@ bb.1.entry: ; MIPS32-LABEL: name: signed_i16 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: $v0 = COPY [[COPY]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[COPY]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s16) = G_CONSTANT i16 -32768 %1:_(s32) = G_SEXT %0(s16) $v0 = COPY %1(s32) @@ -71,9 +71,9 @@ bb.1.entry: ; MIPS32-LABEL: name: signed_i8 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -128 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: $v0 = COPY [[COPY]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[COPY]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s8) = G_CONSTANT i8 -128 %1:_(s32) = G_SEXT %0(s8) $v0 = COPY %1(s32) @@ -88,8 +88,8 @@ bb.1.entry: ; MIPS32-LABEL: name: unsigned_i16 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 - ; MIPS32: $v0 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: $v0 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s16) = G_CONSTANT i16 -32768 %1:_(s32) = G_ZEXT %0(s16) $v0 = COPY %1(s32) @@ -104,8 +104,8 @@ bb.1.entry: ; MIPS32-LABEL: name: unsigned_i8 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; MIPS32: $v0 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: $v0 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s8) = G_CONSTANT i8 -128 %1:_(s32) = G_ZEXT %0(s8) $v0 = COPY %1(s32) @@ -120,9 +120,8 @@ bb.1.entry: ; MIPS32-LABEL: name: i1_true ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: $v0 = COPY [[COPY]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: $v0 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s1) = G_CONSTANT i1 true %1:_(s32) = G_ZEXT %0(s1) $v0 = COPY %1(s32) @@ -137,9 +136,8 @@ bb.1.entry: ; MIPS32-LABEL: name: i1_false ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: $v0 = COPY [[COPY]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: $v0 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s1) = G_CONSTANT i1 false %1:_(s32) = G_ZEXT %0(s1) $v0 = COPY %1(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir @@ -10,10 +10,11 @@ ; MIPS32-LABEL: name: ctlz_i32 ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) - ; MIPS32: $v0 = COPY [[CTLZ]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) + ; MIPS32-NEXT: $v0 = COPY [[CTLZ]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = G_CTLZ %0(s32) $v0 = COPY %1(s32) @@ -30,20 +31,19 @@ ; MIPS32-LABEL: name: ctlz_i64 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLZ]], [[C1]] - ; MIPS32: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[COPY1]](s32) - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[CTLZ1]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: $v1 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLZ]], [[C1]] + ; MIPS32-NEXT: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[COPY1]](s32) + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[CTLZ1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: $v1 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir @@ -10,16 +10,17 @@ ; MIPS32-LABEL: name: cttz_i32 ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[CTLZ]] - ; MIPS32: $v0 = COPY [[SUB]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[CTLZ]] + ; MIPS32-NEXT: $v0 = COPY [[SUB]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = G_CTTZ %0(s32) $v0 = COPY %1(s32) @@ -36,29 +37,28 @@ ; MIPS32-LABEL: name: cttz_i64 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C1]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C2]] - ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR1]], [[ADD2]] - ; MIPS32: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ1]] - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: $v1 = COPY [[C]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C1]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C2]] + ; MIPS32-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR1]], [[ADD2]] + ; MIPS32-NEXT: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ1]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[SUB1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: $v1 = COPY [[C]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) @@ -79,22 +79,22 @@ ; MIPS32-LABEL: name: ffs_i32_expansion ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C2]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = nuw nsw G_ADD [[SUB]], [[C]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[C1]], [[ADD1]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C2]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = nuw nsw G_ADD [[SUB]], [[C]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[ADD1]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %2:_(s32) = G_CONSTANT i32 1 %4:_(s32) = G_CONSTANT i32 0 @@ -116,43 +116,41 @@ ; MIPS32-LABEL: name: ffs_i64_expansion ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[C1]](s32) - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C2]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C2]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C3]] - ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C2]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR1]], [[ADD2]] - ; MIPS32: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ1]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]] - ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[C]] - ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[C1]] - ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[AND3]] - ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32) - ; MIPS32: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] - ; MIPS32: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR2]], [[XOR3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C1]] - ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C]] - ; MIPS32: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV]], [[MV1]] - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[C1]](s32) + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; MIPS32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C2]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C2]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C3]] + ; MIPS32-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C2]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR1]], [[ADD2]] + ; MIPS32-NEXT: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ1]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[SUB1]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[C]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[C1]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ICMP1]] + ; MIPS32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32) + ; MIPS32-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] + ; MIPS32-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR2]], [[XOR3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C1]] + ; MIPS32-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s32), [[MV]], [[MV1]] + ; MIPS32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) + ; MIPS32-NEXT: $v0 = COPY [[UV]](s32) + ; MIPS32-NEXT: $v1 = COPY [[UV1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fptosi_and_fptoui.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fptosi_and_fptoui.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fptosi_and_fptoui.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fptosi_and_fptoui.mir @@ -31,28 +31,31 @@ ; FP32-LABEL: name: f32toi64 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP32: $f12 = COPY [[COPY]](s32) - ; FP32: JAL &__fixsfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 - ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP32: $v0 = COPY [[COPY1]](s32) - ; FP32: $v1 = COPY [[COPY2]](s32) - ; FP32: RetRA implicit $v0, implicit $v1 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $f12 = COPY [[COPY]](s32) + ; FP32-NEXT: JAL &__fixsfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 + ; FP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP32-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP32-NEXT: RetRA implicit $v0, implicit $v1 + ; ; FP64-LABEL: name: f32toi64 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP64: $f12 = COPY [[COPY]](s32) - ; FP64: JAL &__fixsfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 - ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP64: $v0 = COPY [[COPY1]](s32) - ; FP64: $v1 = COPY [[COPY2]](s32) - ; FP64: RetRA implicit $v0, implicit $v1 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $f12 = COPY [[COPY]](s32) + ; FP64-NEXT: JAL &__fixsfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 + ; FP64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP64-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP64-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP64-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP64-NEXT: RetRA implicit $v0, implicit $v1 %0:_(s32) = COPY $f12 %1:_(s64) = G_FPTOSI %0(s32) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) @@ -71,16 +74,19 @@ ; FP32-LABEL: name: f32toi32 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: $v0 = COPY [[FPTOSI]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: $v0 = COPY [[FPTOSI]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32toi32 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: $v0 = COPY [[FPTOSI]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: $v0 = COPY [[FPTOSI]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s32) = G_FPTOSI %0(s32) $v0 = COPY %1(s32) @@ -97,22 +103,25 @@ ; FP32-LABEL: name: f32toi16 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; FP32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP32: $v0 = COPY [[ASHR]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; FP32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP32-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32toi16 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; FP64: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP64: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP64: $v0 = COPY [[ASHR]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; FP64-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP64-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP64-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s16) = G_FPTOSI %0(s32) %2:_(s32) = G_SEXT %1(s16) @@ -130,22 +139,25 @@ ; FP32-LABEL: name: f32toi8 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; FP32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP32: $v0 = COPY [[ASHR]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; FP32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP32-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32toi8 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; FP64: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP64: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP64: $v0 = COPY [[ASHR]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; FP64-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP64-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP64-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s8) = G_FPTOSI %0(s32) %2:_(s32) = G_SEXT %1(s8) @@ -163,28 +175,31 @@ ; FP32-LABEL: name: f64toi64 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP32: $d6 = COPY [[COPY]](s64) - ; FP32: JAL &__fixdfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit-def $v0, implicit-def $v1 - ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP32: $v0 = COPY [[COPY1]](s32) - ; FP32: $v1 = COPY [[COPY2]](s32) - ; FP32: RetRA implicit $v0, implicit $v1 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $d6 = COPY [[COPY]](s64) + ; FP32-NEXT: JAL &__fixdfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit-def $v0, implicit-def $v1 + ; FP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP32-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP32-NEXT: RetRA implicit $v0, implicit $v1 + ; ; FP64-LABEL: name: f64toi64 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP64: $d12_64 = COPY [[COPY]](s64) - ; FP64: JAL &__fixdfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit-def $v0, implicit-def $v1 - ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP64: $v0 = COPY [[COPY1]](s32) - ; FP64: $v1 = COPY [[COPY2]](s32) - ; FP64: RetRA implicit $v0, implicit $v1 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $d12_64 = COPY [[COPY]](s64) + ; FP64-NEXT: JAL &__fixdfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit-def $v0, implicit-def $v1 + ; FP64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP64-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP64-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP64-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP64-NEXT: RetRA implicit $v0, implicit $v1 %0:_(s64) = COPY $d6 %1:_(s64) = G_FPTOSI %0(s64) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) @@ -203,16 +218,19 @@ ; FP32-LABEL: name: f64toi32 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: $v0 = COPY [[FPTOSI]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: $v0 = COPY [[FPTOSI]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64toi32 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: $v0 = COPY [[FPTOSI]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: $v0 = COPY [[FPTOSI]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s32) = G_FPTOSI %0(s64) $v0 = COPY %1(s32) @@ -229,22 +247,25 @@ ; FP32-LABEL: name: f64toi16 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; FP32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP32: $v0 = COPY [[ASHR]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; FP32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP32-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64toi16 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; FP64: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP64: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP64: $v0 = COPY [[ASHR]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; FP64-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP64-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP64-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s16) = G_FPTOSI %0(s64) %2:_(s32) = G_SEXT %1(s16) @@ -262,22 +283,25 @@ ; FP32-LABEL: name: f64toi8 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; FP32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP32: $v0 = COPY [[ASHR]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; FP32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP32-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64toi8 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; FP64: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) - ; FP64: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; FP64: $v0 = COPY [[ASHR]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; FP64-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[FPTOSI]], [[C]](s32) + ; FP64-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; FP64-NEXT: $v0 = COPY [[ASHR]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s8) = G_FPTOSI %0(s64) %2:_(s32) = G_SEXT %1(s8) @@ -295,28 +319,31 @@ ; FP32-LABEL: name: f32tou64 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP32: $f12 = COPY [[COPY]](s32) - ; FP32: JAL &__fixunssfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 - ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP32: $v0 = COPY [[COPY1]](s32) - ; FP32: $v1 = COPY [[COPY2]](s32) - ; FP32: RetRA implicit $v0, implicit $v1 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $f12 = COPY [[COPY]](s32) + ; FP32-NEXT: JAL &__fixunssfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 + ; FP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP32-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP32-NEXT: RetRA implicit $v0, implicit $v1 + ; ; FP64-LABEL: name: f32tou64 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP64: $f12 = COPY [[COPY]](s32) - ; FP64: JAL &__fixunssfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 - ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP64: $v0 = COPY [[COPY1]](s32) - ; FP64: $v1 = COPY [[COPY2]](s32) - ; FP64: RetRA implicit $v0, implicit $v1 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $f12 = COPY [[COPY]](s32) + ; FP64-NEXT: JAL &__fixunssfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit-def $v0, implicit-def $v1 + ; FP64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP64-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP64-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP64-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP64-NEXT: RetRA implicit $v0, implicit $v1 %0:_(s32) = COPY $f12 %1:_(s64) = G_FPTOUI %0(s32) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) @@ -335,34 +362,33 @@ ; FP32-LABEL: name: f32tou32 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: $v0 = COPY [[SELECT]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: $v0 = COPY [[SELECT]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32tou32 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: $v0 = COPY [[SELECT]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: $v0 = COPY [[SELECT]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s32) = G_FPTOUI %0(s32) $v0 = COPY %1(s32) @@ -379,38 +405,37 @@ ; FP32-LABEL: name: f32tou16 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; FP32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP32: $v0 = COPY [[AND1]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; FP32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP32-NEXT: $v0 = COPY [[AND]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32tou16 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; FP64: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP64: $v0 = COPY [[AND1]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; FP64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP64-NEXT: $v0 = COPY [[AND]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s16) = G_FPTOUI %0(s32) %2:_(s32) = G_ZEXT %1(s16) @@ -428,38 +453,37 @@ ; FP32-LABEL: name: f32tou8 ; FP32: liveins: $f12 - ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP32: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; FP32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP32: $v0 = COPY [[AND1]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; FP32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP32-NEXT: $v0 = COPY [[AND]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f32tou8 ; FP64: liveins: $f12 - ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) - ; FP64: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; FP64: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP64: $v0 = COPY [[AND1]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s32) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s32), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; FP64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP64-NEXT: $v0 = COPY [[AND]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $f12 %1:_(s8) = G_FPTOUI %0(s32) %2:_(s32) = G_ZEXT %1(s8) @@ -477,28 +501,31 @@ ; FP32-LABEL: name: f64tou64 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP32: $d6 = COPY [[COPY]](s64) - ; FP32: JAL &__fixunsdfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit-def $v0, implicit-def $v1 - ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP32: $v0 = COPY [[COPY1]](s32) - ; FP32: $v1 = COPY [[COPY2]](s32) - ; FP32: RetRA implicit $v0, implicit $v1 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $d6 = COPY [[COPY]](s64) + ; FP32-NEXT: JAL &__fixunsdfdi, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit-def $v0, implicit-def $v1 + ; FP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP32-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP32-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP32-NEXT: RetRA implicit $v0, implicit $v1 + ; ; FP64-LABEL: name: f64tou64 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; FP64: $d12_64 = COPY [[COPY]](s64) - ; FP64: JAL &__fixunsdfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit-def $v0, implicit-def $v1 - ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 - ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 - ; FP64: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; FP64: $v0 = COPY [[COPY1]](s32) - ; FP64: $v1 = COPY [[COPY2]](s32) - ; FP64: RetRA implicit $v0, implicit $v1 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $d12_64 = COPY [[COPY]](s64) + ; FP64-NEXT: JAL &__fixunsdfdi, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit-def $v0, implicit-def $v1 + ; FP64-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $v0 + ; FP64-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $v1 + ; FP64-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp + ; FP64-NEXT: $v0 = COPY [[COPY1]](s32) + ; FP64-NEXT: $v1 = COPY [[COPY2]](s32) + ; FP64-NEXT: RetRA implicit $v0, implicit $v1 %0:_(s64) = COPY $d6 %1:_(s64) = G_FPTOUI %0(s64) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) @@ -517,34 +544,33 @@ ; FP32-LABEL: name: f64tou32 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: $v0 = COPY [[SELECT]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: $v0 = COPY [[SELECT]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64tou32 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: $v0 = COPY [[SELECT]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: $v0 = COPY [[SELECT]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s32) = G_FPTOUI %0(s64) $v0 = COPY %1(s32) @@ -561,38 +587,37 @@ ; FP32-LABEL: name: f64tou16 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; FP32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP32: $v0 = COPY [[AND1]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; FP32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP32-NEXT: $v0 = COPY [[AND]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64tou16 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; FP64: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP64: $v0 = COPY [[AND1]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; FP64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP64-NEXT: $v0 = COPY [[AND]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s16) = G_FPTOUI %0(s64) %2:_(s32) = G_ZEXT %1(s16) @@ -610,38 +635,37 @@ ; FP32-LABEL: name: f64tou8 ; FP32: liveins: $d6 - ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP32: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP32: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP32: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP32: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP32: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; FP32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP32: $v0 = COPY [[AND1]](s32) - ; FP32: RetRA implicit $v0 + ; FP32-NEXT: {{ $}} + ; FP32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP32-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP32-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP32-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP32-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; FP32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP32-NEXT: $v0 = COPY [[AND]](s32) + ; FP32-NEXT: RetRA implicit $v0 + ; ; FP64-LABEL: name: f64tou8 ; FP64: liveins: $d6 - ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; FP64: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) - ; FP64: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 - ; FP64: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] - ; FP64: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) - ; FP64: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; FP64: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] - ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] - ; FP64: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FP64: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C2]] - ; FP64: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[FPTOSI]], [[XOR]] - ; FP64: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; FP64: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C3]] - ; FP64: $v0 = COPY [[AND1]](s32) - ; FP64: RetRA implicit $v0 + ; FP64-NEXT: {{ $}} + ; FP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; FP64-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; FP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x41E0000000000000 + ; FP64-NEXT: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[COPY]], [[C]] + ; FP64-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FSUB]](s64) + ; FP64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; FP64-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[FPTOSI1]], [[C1]] + ; FP64-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ult), [[COPY]](s64), [[C]] + ; FP64-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s32), [[FPTOSI]], [[XOR]] + ; FP64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; FP64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] + ; FP64-NEXT: $v0 = COPY [[AND]](s32) + ; FP64-NEXT: RetRA implicit $v0 %0:_(s64) = COPY $d6 %1:_(s8) = G_FPTOUI %0(s64) %2:_(s32) = G_ZEXT %1(s8) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir @@ -28,11 +28,12 @@ ; MIPS32-LABEL: name: ne_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s1) = G_ICMP intpred(ne), %0(s32), %1 @@ -51,11 +52,12 @@ ; MIPS32-LABEL: name: eq_ptr ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(s1) = G_ICMP intpred(eq), %0(p0), %1 @@ -74,14 +76,15 @@ ; MIPS32-LABEL: name: ult_i8 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -102,16 +105,17 @@ ; MIPS32-LABEL: name: slt_i16 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; MIPS32: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -132,17 +136,18 @@ ; MIPS32-LABEL: name: eq_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]] - ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]] - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]] + ; MIPS32-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -165,17 +170,18 @@ ; MIPS32-LABEL: name: ne_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]] - ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]] - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s32), [[C]] - ; MIPS32: $v0 = COPY [[ICMP]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]] + ; MIPS32-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s32), [[C]] + ; MIPS32-NEXT: $v0 = COPY [[ICMP]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -198,18 +204,17 @@ ; MIPS32-LABEL: name: sgt_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -232,18 +237,17 @@ ; MIPS32-LABEL: name: sge_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -266,18 +270,17 @@ ; MIPS32-LABEL: name: slt_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -300,18 +303,17 @@ ; MIPS32-LABEL: name: sle_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -334,18 +336,17 @@ ; MIPS32-LABEL: name: ugt_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -368,18 +369,17 @@ ; MIPS32-LABEL: name: uge_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -402,18 +402,17 @@ ; MIPS32-LABEL: name: ult_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -436,18 +435,17 @@ ; MIPS32-LABEL: name: ule_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/jump_table_and_brjt.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/jump_table_and_brjt.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/jump_table_and_brjt.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/jump_table_and_brjt.mir @@ -70,69 +70,84 @@ body: | ; MIPS32-LABEL: name: mod4_0_to_11 ; MIPS32: bb.0.entry: - ; MIPS32: successors: %bb.6(0x40000000), %bb.1(0x40000000) - ; MIPS32: liveins: $a0 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; MIPS32: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C6]] - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY2]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; MIPS32: G_BRCOND [[AND]](s32), %bb.6 - ; MIPS32: bb.1.entry: - ; MIPS32: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) - ; MIPS32: [[JUMP_TABLE:%[0-9]+]]:_(p0) = G_JUMP_TABLE %jump-table.0 - ; MIPS32: G_BRJT [[JUMP_TABLE]](p0), %jump-table.0, [[COPY1]](s32) - ; MIPS32: bb.2.sw.bb: - ; MIPS32: $v0 = COPY [[C4]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.3.sw.bb1: - ; MIPS32: $v0 = COPY [[C3]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.4.sw.bb2: - ; MIPS32: $v0 = COPY [[C2]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.5.sw.bb3: - ; MIPS32: $v0 = COPY [[C1]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.6.sw.default: - ; MIPS32: successors: %bb.7(0x80000000) - ; MIPS32: bb.7.sw.epilog: - ; MIPS32: successors: %bb.13(0x40000000), %bb.8(0x40000000) - ; MIPS32: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C7]] - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY3]](s32), [[COPY4]] - ; MIPS32: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C8]] - ; MIPS32: G_BRCOND [[AND1]](s32), %bb.13 - ; MIPS32: bb.8.sw.epilog: - ; MIPS32: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) - ; MIPS32: [[JUMP_TABLE1:%[0-9]+]]:_(p0) = G_JUMP_TABLE %jump-table.1 - ; MIPS32: G_BRJT [[JUMP_TABLE1]](p0), %jump-table.1, [[COPY3]](s32) - ; MIPS32: bb.9.sw.bb4: - ; MIPS32: $v0 = COPY [[C4]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.10.sw.bb5: - ; MIPS32: $v0 = COPY [[C3]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.11.sw.bb6: - ; MIPS32: $v0 = COPY [[C2]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.12.sw.bb7: - ; MIPS32: $v0 = COPY [[C1]](s32) - ; MIPS32: RetRA implicit $v0 - ; MIPS32: bb.13.sw.default8: - ; MIPS32: $v0 = COPY [[C5]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: successors: %bb.6(0x40000000), %bb.1(0x40000000) + ; MIPS32-NEXT: liveins: $a0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; MIPS32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; MIPS32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C6]] + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY2]] + ; MIPS32-NEXT: G_BRCOND [[ICMP]](s32), %bb.6 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.1.entry: + ; MIPS32-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[JUMP_TABLE:%[0-9]+]]:_(p0) = G_JUMP_TABLE %jump-table.0 + ; MIPS32-NEXT: G_BRJT [[JUMP_TABLE]](p0), %jump-table.0, [[COPY1]](s32) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.2.sw.bb: + ; MIPS32-NEXT: $v0 = COPY [[C4]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.3.sw.bb1: + ; MIPS32-NEXT: $v0 = COPY [[C3]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.4.sw.bb2: + ; MIPS32-NEXT: $v0 = COPY [[C2]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.5.sw.bb3: + ; MIPS32-NEXT: $v0 = COPY [[C1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.6.sw.default: + ; MIPS32-NEXT: successors: %bb.7(0x80000000) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.7.sw.epilog: + ; MIPS32-NEXT: successors: %bb.13(0x40000000), %bb.8(0x40000000) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C7]] + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY3]](s32), [[COPY4]] + ; MIPS32-NEXT: G_BRCOND [[ICMP1]](s32), %bb.13 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.8.sw.epilog: + ; MIPS32-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[JUMP_TABLE1:%[0-9]+]]:_(p0) = G_JUMP_TABLE %jump-table.1 + ; MIPS32-NEXT: G_BRJT [[JUMP_TABLE1]](p0), %jump-table.1, [[COPY3]](s32) + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.9.sw.bb4: + ; MIPS32-NEXT: $v0 = COPY [[C4]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.10.sw.bb5: + ; MIPS32-NEXT: $v0 = COPY [[C3]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.11.sw.bb6: + ; MIPS32-NEXT: $v0 = COPY [[C2]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.12.sw.bb7: + ; MIPS32-NEXT: $v0 = COPY [[C1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: bb.13.sw.default8: + ; MIPS32-NEXT: $v0 = COPY [[C5]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 bb.1.entry: liveins: $a0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -25,11 +25,12 @@ ; MIPS32-LABEL: name: mul_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; MIPS32: $v0 = COPY [[MUL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_MUL %0, %1 @@ -47,14 +48,15 @@ ; MIPS32-LABEL: name: mul_i8_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -75,13 +77,14 @@ ; MIPS32-LABEL: name: mul_i8_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -102,11 +105,12 @@ ; MIPS32-LABEL: name: mul_i8_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[MUL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -127,14 +131,15 @@ ; MIPS32-LABEL: name: mul_i16_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -155,13 +160,14 @@ ; MIPS32-LABEL: name: mul_i16_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -182,11 +188,12 @@ ; MIPS32-LABEL: name: mul_i16_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[MUL]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -207,19 +214,20 @@ ; MIPS32-LABEL: name: mul_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY]] - ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] - ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] - ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; MIPS32: $v0 = COPY [[MUL]](s32) - ; MIPS32: $v1 = COPY [[ADD1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] + ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] + ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -248,73 +256,66 @@ ; MIPS32-LABEL: name: mul_i128 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) - ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) - ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2, align 8) - ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY]] - ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY]] - ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY1]] - ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; MIPS32: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY]] - ; MIPS32: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY1]] - ; MIPS32: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY2]] - ; MIPS32: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY]] - ; MIPS32: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY1]] - ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[MUL4]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C]] - ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL5]] - ; MIPS32: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL5]] - ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C]] - ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[AND2]], [[AND3]] - ; MIPS32: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH1]] - ; MIPS32: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[UMULH1]] - ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C]] - ; MIPS32: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[AND4]] - ; MIPS32: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH2]] - ; MIPS32: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[UMULH2]] - ; MIPS32: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C]] - ; MIPS32: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[AND5]] - ; MIPS32: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ADD2]] - ; MIPS32: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD10]](s32), [[ADD2]] - ; MIPS32: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C]] - ; MIPS32: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[AND6]] - ; MIPS32: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[LOAD3]], [[COPY]] - ; MIPS32: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY1]] - ; MIPS32: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY2]] - ; MIPS32: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY3]] - ; MIPS32: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LOAD2]], [[COPY]] - ; MIPS32: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY1]] - ; MIPS32: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY2]] - ; MIPS32: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]] - ; MIPS32: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL8]] - ; MIPS32: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[MUL9]] - ; MIPS32: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[UMULH3]] - ; MIPS32: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ADD15]], [[UMULH4]] - ; MIPS32: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH5]] - ; MIPS32: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ADD11]] - ; MIPS32: $v0 = COPY [[MUL]](s32) - ; MIPS32: $v1 = COPY [[ADD1]](s32) - ; MIPS32: $a0 = COPY [[ADD10]](s32) - ; MIPS32: $a1 = COPY [[ADD18]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) + ; MIPS32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) + ; MIPS32-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; MIPS32-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2, align 8) + ; MIPS32-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY]] + ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY]] + ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY1]] + ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] + ; MIPS32-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY]] + ; MIPS32-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY1]] + ; MIPS32-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY2]] + ; MIPS32-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY]] + ; MIPS32-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY1]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[MUL4]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL5]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL5]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ICMP2]], [[ICMP3]] + ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH1]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[UMULH1]] + ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP4]] + ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH2]] + ; MIPS32-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[UMULH2]] + ; MIPS32-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[ICMP5]] + ; MIPS32-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ADD2]] + ; MIPS32-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD10]](s32), [[ADD2]] + ; MIPS32-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ICMP6]] + ; MIPS32-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[LOAD3]], [[COPY]] + ; MIPS32-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY1]] + ; MIPS32-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY2]] + ; MIPS32-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY3]] + ; MIPS32-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LOAD2]], [[COPY]] + ; MIPS32-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY1]] + ; MIPS32-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY2]] + ; MIPS32-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]] + ; MIPS32-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL8]] + ; MIPS32-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[MUL9]] + ; MIPS32-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[UMULH3]] + ; MIPS32-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ADD15]], [[UMULH4]] + ; MIPS32-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH5]] + ; MIPS32-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ADD11]] + ; MIPS32-NEXT: $v0 = COPY [[MUL]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD1]](s32) + ; MIPS32-NEXT: $a0 = COPY [[ADD10]](s32) + ; MIPS32-NEXT: $a1 = COPY [[ADD18]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %4:_(s32) = COPY $a2 @@ -348,40 +349,35 @@ ; MIPS32-LABEL: name: umulh_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] - ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] - ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] - ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[MUL1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY1]] - ; MIPS32: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY]] - ; MIPS32: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY1]] - ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL2]], [[UMULH1]] - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[UMULH1]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C]] - ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] - ; MIPS32: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[UMULH2]] - ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C]] - ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[AND2]], [[AND3]] - ; MIPS32: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD2]] - ; MIPS32: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[ADD2]] - ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C]] - ; MIPS32: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[AND4]] - ; MIPS32: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY1]] - ; MIPS32: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD7]] - ; MIPS32: $v0 = COPY [[ADD6]](s32) - ; MIPS32: $v1 = COPY [[ADD8]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] + ; MIPS32-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] + ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[MUL1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL1]] + ; MIPS32-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] + ; MIPS32-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] + ; MIPS32-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY]] + ; MIPS32-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY1]] + ; MIPS32-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL2]], [[UMULH1]] + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[UMULH1]] + ; MIPS32-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[UMULH2]] + ; MIPS32-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ICMP2]], [[ICMP3]] + ; MIPS32-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD2]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[ADD2]] + ; MIPS32-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ICMP4]] + ; MIPS32-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD7]] + ; MIPS32-NEXT: $v0 = COPY [[ADD6]](s32) + ; MIPS32-NEXT: $v1 = COPY [[ADD8]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) @@ -405,20 +401,20 @@ ; MIPS32-LABEL: name: umul_with_overflow ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 - ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C1]] - ; MIPS32: G_STORE [[AND1]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) - ; MIPS32: G_STORE [[MUL]](s32), [[COPY2]](p0) :: (store (s32) into %ir.pmul) - ; MIPS32: RetRA + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 + ; MIPS32-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] + ; MIPS32-NEXT: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) + ; MIPS32-NEXT: G_STORE [[MUL]](s32), [[COPY2]](p0) :: (store (s32) into %ir.pmul) + ; MIPS32-NEXT: RetRA %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(p0) = COPY $a2 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir @@ -22,14 +22,15 @@ ; MIPS32-LABEL: name: select_i8 ; MIPS32: liveins: $a0, $a1, $a2 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %4:_(s32) = COPY $a1 @@ -52,14 +53,15 @@ ; MIPS32-LABEL: name: select_i16 ; MIPS32: liveins: $a0, $a1, $a2 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %4:_(s32) = COPY $a1 @@ -82,14 +84,15 @@ ; MIPS32-LABEL: name: select_i32 ; MIPS32: liveins: $a0, $a1, $a2 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %1:_(s32) = COPY $a1 @@ -109,14 +112,15 @@ ; MIPS32-LABEL: name: select_ptr ; MIPS32: liveins: $a0, $a1, $a2 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; MIPS32: $v0 = COPY [[SELECT]](p0) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](p0) + ; MIPS32-NEXT: RetRA implicit $v0 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %1:_(p0) = COPY $a1 @@ -136,17 +140,17 @@ ; MIPS32-LABEL: name: select_with_negation ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ICMP]], [[C]] - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY2]], [[COPY3]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] + ; MIPS32-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ICMP]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[XOR]](s32), [[COPY2]], [[COPY3]] + ; MIPS32-NEXT: $v0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = COPY $a2 @@ -172,22 +176,23 @@ ; MIPS32-LABEL: name: select_i64 ; MIPS32: liveins: $a0, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) - ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) - ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[MV]], [[MV1]] - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) + ; MIPS32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) + ; MIPS32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[MV]], [[MV1]] + ; MIPS32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT]](s64) + ; MIPS32-NEXT: $v0 = COPY [[UV]](s32) + ; MIPS32-NEXT: $v1 = COPY [[UV1]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %4:_(s32) = COPY $a2 @@ -215,14 +220,15 @@ ; MIPS32-LABEL: name: select_float ; MIPS32: liveins: $a0, $a1, $a2 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] - ; MIPS32: $f0 = COPY [[SELECT]](s32) - ; MIPS32: RetRA implicit $f0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY1]], [[COPY2]] + ; MIPS32-NEXT: $f0 = COPY [[SELECT]](s32) + ; MIPS32-NEXT: RetRA implicit $f0 %3:_(s32) = COPY $a0 %0:_(s1) = G_TRUNC %3(s32) %1:_(s32) = COPY $a1 @@ -244,15 +250,16 @@ ; MIPS32-LABEL: name: select_double ; MIPS32: liveins: $d6, $d7 - ; MIPS32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7 - ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] - ; MIPS32: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] - ; MIPS32: $d0 = COPY [[SELECT]](s64) - ; MIPS32: RetRA implicit $d0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7 + ; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; MIPS32-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; MIPS32-NEXT: $d0 = COPY [[SELECT]](s64) + ; MIPS32-NEXT: RetRA implicit $d0 %0:_(s64) = COPY $d6 %1:_(s64) = COPY $d7 %4:_(p0) = G_FRAME_INDEX %fixed-stack.0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir @@ -23,11 +23,12 @@ ; MIPS32-LABEL: name: sub_i32 ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; MIPS32: $v0 = COPY [[SUB]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] + ; MIPS32-NEXT: $v0 = COPY [[SUB]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s32) = G_SUB %0, %1 @@ -45,14 +46,15 @@ ; MIPS32-LABEL: name: sub_i8_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SUB]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SUB]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -73,13 +75,14 @@ ; MIPS32-LABEL: name: sub_i8_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -100,11 +103,12 @@ ; MIPS32-LABEL: name: sub_i8_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[SUB]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[SUB]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s8) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -125,14 +129,15 @@ ; MIPS32-LABEL: name: sub_i16_sext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SUB]], [[C]](s32) - ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; MIPS32: $v0 = COPY [[ASHR]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; MIPS32-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SUB]], [[C]](s32) + ; MIPS32-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32-NEXT: $v0 = COPY [[ASHR]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -153,13 +158,14 @@ ; MIPS32-LABEL: name: sub_i16_zext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; MIPS32-NEXT: $v0 = COPY [[AND]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -180,11 +186,12 @@ ; MIPS32-LABEL: name: sub_i16_aext ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] - ; MIPS32: $v0 = COPY [[SUB]](s32) - ; MIPS32: RetRA implicit $v0 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[COPY]] + ; MIPS32-NEXT: $v0 = COPY [[SUB]](s32) + ; MIPS32-NEXT: RetRA implicit $v0 %2:_(s32) = COPY $a0 %0:_(s16) = G_TRUNC %2(s32) %3:_(s32) = COPY $a1 @@ -205,19 +212,18 @@ ; MIPS32-LABEL: name: sub_i64 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY3]], [[COPY1]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY3]](s32), [[COPY1]] - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] - ; MIPS32: $v0 = COPY [[SUB2]](s32) - ; MIPS32: $v1 = COPY [[SUB]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY3]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY3]](s32), [[COPY1]] + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY]] + ; MIPS32-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] + ; MIPS32-NEXT: $v0 = COPY [[SUB2]](s32) + ; MIPS32-NEXT: $v1 = COPY [[SUB]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %3(s32), %2(s32) @@ -246,44 +252,44 @@ ; MIPS32-LABEL: name: sub_i128 ; MIPS32: liveins: $a0, $a1, $a2, $a3 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 - ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) - ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) - ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2, align 8) - ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) - ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[LOAD]], [[COPY]] - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[LOAD]](s32), [[COPY]] - ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LOAD1]], [[COPY1]] - ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[SUB1]](s32), [[LOAD1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; MIPS32: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] - ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND1]] - ; MIPS32: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LOAD2]], [[COPY2]] - ; MIPS32: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[SUB3]](s32), [[LOAD2]] - ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C]] - ; MIPS32: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[AND2]] - ; MIPS32: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB3]](s32), [[C1]] - ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[OR]] - ; MIPS32: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP3]], [[AND3]] - ; MIPS32: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LOAD3]], [[COPY3]] - ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C]] - ; MIPS32: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND4]] - ; MIPS32: $v0 = COPY [[SUB]](s32) - ; MIPS32: $v1 = COPY [[SUB2]](s32) - ; MIPS32: $a0 = COPY [[SUB4]](s32) - ; MIPS32: $a1 = COPY [[SUB6]](s32) - ; MIPS32: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 + ; MIPS32-NEXT: {{ $}} + ; MIPS32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) + ; MIPS32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) + ; MIPS32-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; MIPS32-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2, align 8) + ; MIPS32-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) + ; MIPS32-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[LOAD]], [[COPY]] + ; MIPS32-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[LOAD]](s32), [[COPY]] + ; MIPS32-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LOAD1]], [[COPY1]] + ; MIPS32-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[SUB1]](s32), [[LOAD1]] + ; MIPS32-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] + ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; MIPS32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; MIPS32-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] + ; MIPS32-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LOAD2]], [[COPY2]] + ; MIPS32-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[SUB3]](s32), [[LOAD2]] + ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C1]] + ; MIPS32-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[AND1]] + ; MIPS32-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB3]](s32), [[C]] + ; MIPS32-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[OR]] + ; MIPS32-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ICMP3]], [[AND2]] + ; MIPS32-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[LOAD3]], [[COPY3]] + ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C1]] + ; MIPS32-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; MIPS32-NEXT: $v0 = COPY [[SUB]](s32) + ; MIPS32-NEXT: $v1 = COPY [[SUB2]](s32) + ; MIPS32-NEXT: $a0 = COPY [[SUB4]](s32) + ; MIPS32-NEXT: $a1 = COPY [[SUB6]](s32) + ; MIPS32-NEXT: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %4:_(s32) = COPY $a2 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll @@ -89,7 +89,6 @@ ; MIPS32-NEXT: addu $2, $6, $4 ; MIPS32-NEXT: sltu $3, $2, $4 ; MIPS32-NEXT: addu $1, $7, $5 -; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: addu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -115,8 +114,7 @@ ; MIPS32-NEXT: sltu $9, $2, $8 ; MIPS32-NEXT: addu $3, $4, $3 ; MIPS32-NEXT: sltu $4, $3, $4 -; MIPS32-NEXT: andi $8, $9, 1 -; MIPS32-NEXT: addu $3, $3, $8 +; MIPS32-NEXT: addu $3, $3, $9 ; MIPS32-NEXT: sltiu $8, $3, 1 ; MIPS32-NEXT: and $8, $8, $9 ; MIPS32-NEXT: or $8, $4, $8 @@ -178,7 +176,6 @@ ; MIPS32-NEXT: addu $1, $4, $5 ; MIPS32-NEXT: sltu $2, $1, $5 ; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: andi $2, $2, 1 ; MIPS32-NEXT: sb $2, 0($7) ; MIPS32-NEXT: sw $1, 0($6) ; MIPS32-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll @@ -306,8 +306,7 @@ define i16 @shl_i16(i16 %a) { ; MIPS32-LABEL: shl_i16: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $1, $zero, 2 -; MIPS32-NEXT: sllv $2, $4, $1 +; MIPS32-NEXT: sll $2, $4, 2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -318,10 +317,9 @@ define i8 @ashr_i8(i8 %a) { ; MIPS32-LABEL: ashr_i8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $2, $zero, 2 ; MIPS32-NEXT: sll $1, $4, 24 ; MIPS32-NEXT: sra $1, $1, 24 -; MIPS32-NEXT: srav $2, $1, $2 +; MIPS32-NEXT: sra $2, $1, 2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -332,9 +330,8 @@ define i16 @lshr_i16(i16 %a) { ; MIPS32-LABEL: lshr_i16: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $2, $zero, 2 ; MIPS32-NEXT: andi $1, $4, 65535 -; MIPS32-NEXT: srlv $2, $1, $2 +; MIPS32-NEXT: srl $2, $1, 2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -346,24 +343,19 @@ ; MIPS32-LABEL: shl_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: move $3, $4 -; MIPS32-NEXT: move $9, $6 -; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $8, $9, $1 -; MIPS32-NEXT: subu $4, $1, $9 +; MIPS32-NEXT: ori $4, $zero, 32 +; MIPS32-NEXT: subu $8, $6, $4 +; MIPS32-NEXT: subu $1, $4, $6 ; MIPS32-NEXT: ori $2, $zero, 0 -; MIPS32-NEXT: sltu $6, $9, $1 -; MIPS32-NEXT: sltiu $1, $9, 1 -; MIPS32-NEXT: sllv $7, $3, $9 -; MIPS32-NEXT: srlv $4, $3, $4 -; MIPS32-NEXT: sllv $9, $5, $9 -; MIPS32-NEXT: or $4, $4, $9 +; MIPS32-NEXT: sltu $4, $6, $4 +; MIPS32-NEXT: sllv $7, $3, $6 +; MIPS32-NEXT: srlv $1, $3, $1 +; MIPS32-NEXT: sllv $9, $5, $6 +; MIPS32-NEXT: or $1, $1, $9 ; MIPS32-NEXT: sllv $3, $3, $8 -; MIPS32-NEXT: andi $8, $6, 1 -; MIPS32-NEXT: movn $2, $7, $8 -; MIPS32-NEXT: andi $6, $6, 1 -; MIPS32-NEXT: movn $3, $4, $6 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $3, $5, $1 +; MIPS32-NEXT: movn $2, $7, $4 +; MIPS32-NEXT: movn $3, $1, $4 +; MIPS32-NEXT: movz $3, $5, $6 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -379,23 +371,18 @@ ; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $2, $5 ; MIPS32-NEXT: lw $5, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $3, $6 ; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $8, $3, $1 -; MIPS32-NEXT: subu $7, $1, $3 -; MIPS32-NEXT: sltu $4, $3, $1 -; MIPS32-NEXT: sltiu $6, $3, 1 -; MIPS32-NEXT: srav $1, $2, $3 -; MIPS32-NEXT: srlv $3, $5, $3 +; MIPS32-NEXT: subu $8, $6, $1 +; MIPS32-NEXT: subu $7, $1, $6 +; MIPS32-NEXT: sltu $4, $6, $1 +; MIPS32-NEXT: srav $1, $2, $6 +; MIPS32-NEXT: srlv $3, $5, $6 ; MIPS32-NEXT: sllv $7, $2, $7 ; MIPS32-NEXT: or $7, $3, $7 ; MIPS32-NEXT: sra $3, $2, 31 ; MIPS32-NEXT: srav $2, $2, $8 -; MIPS32-NEXT: andi $8, $4, 1 -; MIPS32-NEXT: movn $2, $7, $8 -; MIPS32-NEXT: andi $6, $6, 1 -; MIPS32-NEXT: movn $2, $5, $6 -; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $2, $7, $4 +; MIPS32-NEXT: movz $2, $5, $6 ; MIPS32-NEXT: movn $3, $1, $4 ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra @@ -413,23 +400,18 @@ ; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $2, $5 ; MIPS32-NEXT: lw $5, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $7, $6 ; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $8, $7, $1 -; MIPS32-NEXT: subu $9, $1, $7 +; MIPS32-NEXT: subu $8, $6, $1 +; MIPS32-NEXT: subu $9, $1, $6 ; MIPS32-NEXT: ori $3, $zero, 0 -; MIPS32-NEXT: sltu $4, $7, $1 -; MIPS32-NEXT: sltiu $6, $7, 1 -; MIPS32-NEXT: srlv $1, $2, $7 -; MIPS32-NEXT: srlv $7, $5, $7 +; MIPS32-NEXT: sltu $4, $6, $1 +; MIPS32-NEXT: srlv $1, $2, $6 +; MIPS32-NEXT: srlv $7, $5, $6 ; MIPS32-NEXT: sllv $9, $2, $9 ; MIPS32-NEXT: or $7, $7, $9 ; MIPS32-NEXT: srlv $2, $2, $8 -; MIPS32-NEXT: andi $8, $4, 1 -; MIPS32-NEXT: movn $2, $7, $8 -; MIPS32-NEXT: andi $6, $6, 1 -; MIPS32-NEXT: movn $2, $5, $6 -; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $2, $7, $4 +; MIPS32-NEXT: movz $2, $5, $6 ; MIPS32-NEXT: movn $3, $1, $4 ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll @@ -17,14 +17,11 @@ define i64 @ctlz_i64(i64 %a) { ; MIPS32-LABEL: ctlz_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: move $1, $4 ; MIPS32-NEXT: ori $3, $zero, 0 -; MIPS32-NEXT: sltiu $4, $5, 1 -; MIPS32-NEXT: clz $1, $1 +; MIPS32-NEXT: clz $1, $4 ; MIPS32-NEXT: addiu $1, $1, 32 ; MIPS32-NEXT: clz $2, $5 -; MIPS32-NEXT: andi $4, $4, 1 -; MIPS32-NEXT: movn $2, $1, $4 +; MIPS32-NEXT: movz $2, $1, $5 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll @@ -21,9 +21,7 @@ define i64 @cttz_i64(i64 %a) { ; MIPS32-LABEL: cttz_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: move $6, $4 ; MIPS32-NEXT: ori $3, $zero, 0 -; MIPS32-NEXT: sltiu $4, $6, 1 ; MIPS32-NEXT: not $1, $5 ; MIPS32-NEXT: addiu $2, $5, -1 ; MIPS32-NEXT: and $1, $1, $2 @@ -31,13 +29,12 @@ ; MIPS32-NEXT: clz $1, $1 ; MIPS32-NEXT: subu $1, $2, $1 ; MIPS32-NEXT: addiu $1, $1, 32 -; MIPS32-NEXT: not $5, $6 -; MIPS32-NEXT: addiu $6, $6, -1 +; MIPS32-NEXT: not $5, $4 +; MIPS32-NEXT: addiu $6, $4, -1 ; MIPS32-NEXT: and $5, $5, $6 ; MIPS32-NEXT: clz $5, $5 ; MIPS32-NEXT: subu $2, $2, $5 -; MIPS32-NEXT: andi $4, $4, 1 -; MIPS32-NEXT: movn $2, $1, $4 +; MIPS32-NEXT: movz $2, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -58,9 +55,7 @@ ; MIPS32-NEXT: clz $3, $3 ; MIPS32-NEXT: subu $2, $2, $3 ; MIPS32-NEXT: addiu $2, $2, 1 -; MIPS32-NEXT: sltiu $3, $4, 1 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: movz $2, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -76,7 +71,6 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $3, $zero, 1 ; MIPS32-NEXT: ori $1, $zero, 0 -; MIPS32-NEXT: sltiu $7, $4, 1 ; MIPS32-NEXT: not $2, $5 ; MIPS32-NEXT: addiu $6, $5, -1 ; MIPS32-NEXT: and $6, $2, $6 @@ -84,25 +78,21 @@ ; MIPS32-NEXT: clz $6, $6 ; MIPS32-NEXT: subu $6, $2, $6 ; MIPS32-NEXT: addiu $6, $6, 32 -; MIPS32-NEXT: not $8, $4 -; MIPS32-NEXT: addiu $9, $4, -1 -; MIPS32-NEXT: and $8, $8, $9 -; MIPS32-NEXT: clz $8, $8 -; MIPS32-NEXT: subu $2, $2, $8 -; MIPS32-NEXT: andi $7, $7, 1 -; MIPS32-NEXT: movn $2, $6, $7 +; MIPS32-NEXT: not $7, $4 +; MIPS32-NEXT: addiu $8, $4, -1 +; MIPS32-NEXT: and $7, $7, $8 +; MIPS32-NEXT: clz $7, $7 +; MIPS32-NEXT: subu $2, $2, $7 +; MIPS32-NEXT: movz $2, $6, $4 ; MIPS32-NEXT: addiu $2, $2, 1 ; MIPS32-NEXT: sltu $6, $2, $3 ; MIPS32-NEXT: addiu $3, $1, 0 -; MIPS32-NEXT: andi $6, $6, 1 ; MIPS32-NEXT: addu $3, $3, $6 ; MIPS32-NEXT: xori $4, $4, 0 ; MIPS32-NEXT: xori $5, $5, 0 ; MIPS32-NEXT: or $4, $4, $5 -; MIPS32-NEXT: sltiu $4, $4, 1 -; MIPS32-NEXT: andi $4, $4, 1 -; MIPS32-NEXT: movn $2, $1, $4 -; MIPS32-NEXT: movn $3, $1, $4 +; MIPS32-NEXT: movz $2, $1, $4 +; MIPS32-NEXT: movz $3, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll @@ -151,7 +151,6 @@ ; MIPS32-NEXT: addiu $3, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f0 ; MIPS32-NEXT: movf $3, $zero, $fcc0 -; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -175,7 +174,6 @@ ; MIPS32-NEXT: addiu $3, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f0 ; MIPS32-NEXT: movf $3, $zero, $fcc0 -; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn $1, $2, $3 ; MIPS32-NEXT: andi $2, $1, 65535 ; MIPS32-NEXT: jr $ra @@ -200,7 +198,6 @@ ; MIPS32-NEXT: addiu $3, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f0 ; MIPS32-NEXT: movf $3, $zero, $fcc0 -; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn $1, $2, $3 ; MIPS32-NEXT: andi $2, $1, 255 ; MIPS32-NEXT: jr $ra @@ -245,7 +242,6 @@ ; FP32-NEXT: addiu $3, $zero, 1 ; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 -; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop @@ -266,7 +262,6 @@ ; FP64-NEXT: addiu $3, $zero, 1 ; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 -; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop @@ -292,7 +287,6 @@ ; FP32-NEXT: addiu $3, $zero, 1 ; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 -; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $1, $2, $3 ; FP32-NEXT: andi $2, $1, 65535 ; FP32-NEXT: jr $ra @@ -314,7 +308,6 @@ ; FP64-NEXT: addiu $3, $zero, 1 ; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 -; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $1, $2, $3 ; FP64-NEXT: andi $2, $1, 65535 ; FP64-NEXT: jr $ra @@ -341,7 +334,6 @@ ; FP32-NEXT: addiu $3, $zero, 1 ; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 -; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $1, $2, $3 ; FP32-NEXT: andi $2, $1, 255 ; FP32-NEXT: jr $ra @@ -363,7 +355,6 @@ ; FP64-NEXT: addiu $3, $zero, 1 ; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 -; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $1, $2, $3 ; FP64-NEXT: andi $2, $1, 255 ; FP64-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll @@ -189,11 +189,9 @@ ; MIPS32-LABEL: sgt_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $2, $7, $5 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $6, $4 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -206,12 +204,10 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $1, $5, $7 ; MIPS32-NEXT: xori $2, $1, 1 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $4, $6 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -223,11 +219,9 @@ ; MIPS32-LABEL: slt_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $2, $5, $7 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $4, $6 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -240,12 +234,10 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $1, $7, $5 ; MIPS32-NEXT: xori $2, $1, 1 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $6, $4 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -257,11 +249,9 @@ ; MIPS32-LABEL: ugt_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $2, $7, $5 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $6, $4 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -274,12 +264,10 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $1, $5, $7 ; MIPS32-NEXT: xori $2, $1, 1 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $4, $6 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -291,11 +279,9 @@ ; MIPS32-LABEL: ult_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $2, $5, $7 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $4, $6 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -308,12 +294,10 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $1, $7, $5 ; MIPS32-NEXT: xori $2, $1, 1 -; MIPS32-NEXT: xor $1, $5, $7 -; MIPS32-NEXT: sltiu $3, $1, 1 ; MIPS32-NEXT: sltu $1, $6, $4 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: xor $3, $5, $7 +; MIPS32-NEXT: movz $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll @@ -23,7 +23,6 @@ ; MIPS32-NEXT: subu $2, $4, $2 ; MIPS32-NEXT: sw $2, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sltu $1, $1, $2 -; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: bnez $1, $BB0_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_1: # %entry @@ -64,7 +63,6 @@ ; MIPS32-NEXT: subu $2, $2, $3 ; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sltu $1, $1, $2 -; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: bnez $1, $BB0_13 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_8: # %sw.epilog @@ -125,7 +123,6 @@ ; MIPS32_PIC-NEXT: subu $2, $4, $2 ; MIPS32_PIC-NEXT: sw $2, 36($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: sltu $1, $1, $2 -; MIPS32_PIC-NEXT: andi $1, $1, 1 ; MIPS32_PIC-NEXT: bnez $1, $BB0_6 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_1: # %entry @@ -168,7 +165,6 @@ ; MIPS32_PIC-NEXT: subu $2, $2, $3 ; MIPS32_PIC-NEXT: sw $2, 4($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: sltu $1, $1, $2 -; MIPS32_PIC-NEXT: andi $1, $1, 1 ; MIPS32_PIC-NEXT: bnez $1, $BB0_13 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_8: # %sw.epilog diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -123,10 +123,8 @@ ; MIPS32-NEXT: mfhi $5 ; MIPS32-NEXT: addu $3, $3, $4 ; MIPS32-NEXT: sltu $4, $3, $4 -; MIPS32-NEXT: andi $4, $4, 1 ; MIPS32-NEXT: addu $3, $3, $5 ; MIPS32-NEXT: sltu $5, $3, $5 -; MIPS32-NEXT: andi $5, $5, 1 ; MIPS32-NEXT: addu $10, $4, $5 ; MIPS32-NEXT: mul $4, $8, $14 ; MIPS32-NEXT: mul $5, $7, $13 @@ -137,22 +135,17 @@ ; MIPS32-NEXT: mfhi $11 ; MIPS32-NEXT: addu $4, $4, $5 ; MIPS32-NEXT: sltu $5, $4, $5 -; MIPS32-NEXT: andi $5, $5, 1 ; MIPS32-NEXT: addu $4, $4, $24 ; MIPS32-NEXT: sltu $24, $4, $24 -; MIPS32-NEXT: andi $24, $24, 1 ; MIPS32-NEXT: addu $5, $5, $24 ; MIPS32-NEXT: addu $4, $4, $15 ; MIPS32-NEXT: sltu $15, $4, $15 -; MIPS32-NEXT: andi $15, $15, 1 ; MIPS32-NEXT: addu $5, $5, $15 ; MIPS32-NEXT: addu $4, $4, $11 ; MIPS32-NEXT: sltu $11, $4, $11 -; MIPS32-NEXT: andi $11, $11, 1 ; MIPS32-NEXT: addu $5, $5, $11 ; MIPS32-NEXT: addu $4, $4, $10 ; MIPS32-NEXT: sltu $10, $4, $10 -; MIPS32-NEXT: andi $10, $10, 1 ; MIPS32-NEXT: addu $5, $5, $10 ; MIPS32-NEXT: mul $1, $1, $14 ; MIPS32-NEXT: mul $11, $8, $13 @@ -187,7 +180,6 @@ ; MIPS32-NEXT: mul $1, $4, $5 ; MIPS32-NEXT: sltu $2, $zero, $2 ; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: andi $2, $2, 1 ; MIPS32-NEXT: sb $2, 0($7) ; MIPS32-NEXT: sw $1, 0($6) ; MIPS32-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll @@ -59,7 +59,6 @@ ; MIPS32-NEXT: move $2, $7 ; MIPS32-NEXT: slt $1, $4, $5 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: movn $2, $6, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll @@ -173,10 +173,9 @@ define float @u16tof32(i16 zeroext %a) { ; FP32-LABEL: u16tof32: ; FP32: # %bb.0: # %entry -; FP32-NEXT: andi $1, $4, 65535 -; FP32-NEXT: lui $2, 17200 -; FP32-NEXT: mtc1 $1, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: lui $1, 17200 +; FP32-NEXT: mtc1 $4, $f0 +; FP32-NEXT: mtc1 $1, $f1 ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: ori $1, $zero, 0 ; FP32-NEXT: mtc1 $1, $f2 @@ -188,10 +187,9 @@ ; ; FP64-LABEL: u16tof32: ; FP64: # %bb.0: # %entry -; FP64-NEXT: andi $1, $4, 65535 -; FP64-NEXT: lui $2, 17200 -; FP64-NEXT: mtc1 $1, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: lui $1, 17200 +; FP64-NEXT: mtc1 $4, $f0 +; FP64-NEXT: mthc1 $1, $f0 ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: ori $1, $zero, 0 ; FP64-NEXT: mtc1 $1, $f1 @@ -208,10 +206,9 @@ define float @u8tof32(i8 zeroext %a) { ; FP32-LABEL: u8tof32: ; FP32: # %bb.0: # %entry -; FP32-NEXT: andi $1, $4, 255 -; FP32-NEXT: lui $2, 17200 -; FP32-NEXT: mtc1 $1, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: lui $1, 17200 +; FP32-NEXT: mtc1 $4, $f0 +; FP32-NEXT: mtc1 $1, $f1 ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: ori $1, $zero, 0 ; FP32-NEXT: mtc1 $1, $f2 @@ -223,10 +220,9 @@ ; ; FP64-LABEL: u8tof32: ; FP64: # %bb.0: # %entry -; FP64-NEXT: andi $1, $4, 255 -; FP64-NEXT: lui $2, 17200 -; FP64-NEXT: mtc1 $1, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: lui $1, 17200 +; FP64-NEXT: mtc1 $4, $f0 +; FP64-NEXT: mthc1 $1, $f0 ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: ori $1, $zero, 0 ; FP64-NEXT: mtc1 $1, $f1 @@ -292,10 +288,9 @@ define double @u16tof64(i16 zeroext %a) { ; FP32-LABEL: u16tof64: ; FP32: # %bb.0: # %entry -; FP32-NEXT: andi $1, $4, 65535 -; FP32-NEXT: lui $2, 17200 -; FP32-NEXT: mtc1 $1, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: lui $1, 17200 +; FP32-NEXT: mtc1 $4, $f0 +; FP32-NEXT: mtc1 $1, $f1 ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: ori $1, $zero, 0 ; FP32-NEXT: mtc1 $1, $f2 @@ -306,10 +301,9 @@ ; ; FP64-LABEL: u16tof64: ; FP64: # %bb.0: # %entry -; FP64-NEXT: andi $1, $4, 65535 -; FP64-NEXT: lui $2, 17200 -; FP64-NEXT: mtc1 $1, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: lui $1, 17200 +; FP64-NEXT: mtc1 $4, $f0 +; FP64-NEXT: mthc1 $1, $f0 ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: ori $1, $zero, 0 ; FP64-NEXT: mtc1 $1, $f1 @@ -325,10 +319,9 @@ define double @u8tof64(i8 zeroext %a) { ; FP32-LABEL: u8tof64: ; FP32: # %bb.0: # %entry -; FP32-NEXT: andi $1, $4, 255 -; FP32-NEXT: lui $2, 17200 -; FP32-NEXT: mtc1 $1, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: lui $1, 17200 +; FP32-NEXT: mtc1 $4, $f0 +; FP32-NEXT: mtc1 $1, $f1 ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: ori $1, $zero, 0 ; FP32-NEXT: mtc1 $1, $f2 @@ -339,10 +332,9 @@ ; ; FP64-LABEL: u8tof64: ; FP64: # %bb.0: # %entry -; FP64-NEXT: andi $1, $4, 255 -; FP64-NEXT: lui $2, 17200 -; FP64-NEXT: mtc1 $1, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: lui $1, 17200 +; FP64-NEXT: mtc1 $4, $f0 +; FP64-NEXT: mthc1 $1, $f0 ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: ori $1, $zero, 0 ; FP64-NEXT: mtc1 $1, $f1 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll @@ -90,7 +90,6 @@ ; MIPS32-NEXT: subu $2, $6, $4 ; MIPS32-NEXT: sltu $3, $6, $4 ; MIPS32-NEXT: subu $1, $7, $5 -; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: subu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -115,8 +114,7 @@ ; MIPS32-NEXT: sltu $9, $8, $9 ; MIPS32-NEXT: subu $8, $3, $5 ; MIPS32-NEXT: sltu $5, $3, $8 -; MIPS32-NEXT: andi $3, $9, 1 -; MIPS32-NEXT: subu $3, $8, $3 +; MIPS32-NEXT: subu $3, $8, $9 ; MIPS32-NEXT: sltiu $8, $8, 1 ; MIPS32-NEXT: and $8, $8, $9 ; MIPS32-NEXT: or $8, $5, $8 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-add.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-add.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-add.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-add.mir @@ -90,9 +90,7 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD %xlo, %ylo ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), %ylo ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD %xhi, %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s32) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -124,9 +122,7 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD %lo1, %lo2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), %lo2 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD %hi1, %hi2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s32) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -158,17 +154,13 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), %lo2 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD %mid1, %mid2 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), %mid1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[AND]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ADD2]](s32), [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD %hi1, %hi2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C2]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[AND2]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[OR]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s32) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s32) ; CHECK-NEXT: $x12 = COPY [[ADD4]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-ashr.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-ashr.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-ashr.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-ashr.mir @@ -117,15 +117,9 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C3]](s32) ; CHECK-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[SUB]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[ASHR3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), %xlo, [[SELECT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C6]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ASHR1]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[ASHR3]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %xlo, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ASHR1]], [[ASHR2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -166,15 +160,9 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR %hi1, [[C2]](s32) ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR %hi1, [[SUB]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[ASHR2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), %lo1, [[SELECT]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %lo1, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ASHR]], [[ASHR1]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -218,95 +206,63 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ASHR]], [[SUB3]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[SUB2]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[ASHR1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), %hi1, [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB %lo2, [[C7]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C7]], %lo2 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C7]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %lo2(s32), [[C8]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[OR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), %hi1, [[SELECT]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB %lo2, [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C5]], %lo2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %lo2(s32), [[C6]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR %mid1, %lo2(s32) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %lo1, %lo2(s32) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL %mid1, [[SUB5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR %mid1, [[SUB4]](s32) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[OR1]], [[LSHR3]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), %lo1, [[SELECT2]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C12]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[LSHR1]], [[C9]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C13]] - ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C13]], [[SUB1]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C13]] - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C14]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[OR1]], [[LSHR3]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), %lo1, [[SELECT2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[LSHR1]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C8]] + ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C8]], [[SUB1]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C8]] + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C9]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL %hi1, [[SUB1]](s32) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR %hi1, [[SUB7]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ASHR]], [[SUB1]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL3]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL %hi1, [[SUB6]](s32) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C16]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s32), [[SHL2]], [[C15]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND6]](s32), [[OR2]], [[SHL4]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[AND7]](s32), [[ASHR]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s32), [[SHL2]], [[C10]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s32), [[OR2]], [[SHL4]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s32), [[ASHR]], [[SELECT6]] ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SELECT3]], [[SELECT5]] ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[SELECT7]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C19]](s32) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C20]] - ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C20]], [[SUB]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C20]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB]](s32), [[C21]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C11]](s32) + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C12]] + ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C12]], [[SUB]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C12]] + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB]](s32), [[C13]] ; CHECK-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[SUB]](s32) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR %hi1, [[SUB]](s32) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ASHR]], [[SUB9]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL5]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C22]](s32) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C14]](s32) ; CHECK-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[SUB8]](s32) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP8]], [[C23]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[AND8]](s32), [[OR5]], [[ASHR5]] - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ICMP9]], [[C24]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[AND9]](s32), %hi1, [[SELECT8]] - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ICMP8]], [[C25]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[AND10]](s32), [[ASHR3]], [[ASHR4]] - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C26]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[AND11]](s32), [[OR3]], [[SELECT9]] - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C27]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s32) = G_SELECT [[AND12]](s32), [[OR4]], [[SELECT10]] - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C28]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s32) = G_SELECT [[AND13]](s32), %lo1, [[SELECT11]] - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C29]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s32) = G_SELECT [[AND14]](s32), %mid1, [[SELECT12]] - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C30]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s32) = G_SELECT [[AND15]](s32), [[SELECT1]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[ICMP8]](s32), [[OR5]], [[ASHR5]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[ICMP9]](s32), %hi1, [[SELECT8]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[ICMP8]](s32), [[ASHR3]], [[ASHR4]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR3]], [[SELECT9]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR4]], [[SELECT10]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %lo1, [[SELECT11]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %mid1, [[SELECT12]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SELECT1]], [[ASHR2]] ; CHECK-NEXT: $x10 = COPY [[SELECT13]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT14]](s32) ; CHECK-NEXT: $x12 = COPY [[SELECT15]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-icmp.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-icmp.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-icmp.mir @@ -115,9 +115,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), %xhi(s32), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -146,9 +144,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -277,9 +273,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %xhi(s32), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -308,9 +302,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -439,9 +431,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), %xhi(s32), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -470,9 +460,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -601,9 +589,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR]](s32), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), %xhi(s32), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -632,9 +618,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -759,9 +743,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -790,9 +772,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -917,9 +897,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -948,9 +926,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -1075,9 +1051,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[AND]](s32), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -1106,9 +1080,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -1233,9 +1205,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND1]](s32), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[AND]](s32), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 @@ -1264,9 +1234,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %xlo(s32), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), %xhi(s32), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s32) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-lshr.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-lshr.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-lshr.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-lshr.mir @@ -114,15 +114,9 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[SUB]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[AND]], [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C7]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[LSHR]], [[C4]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[AND]], [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[LSHR]], [[C4]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -162,15 +156,9 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %hi1, [[SUB]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), %lo1, [[SELECT]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[LSHR]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %lo1, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[LSHR]], [[C2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -213,93 +201,61 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[SUB3]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[C]], [[SUB2]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[OR]], [[LSHR1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), %hi1, [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB %lo2, [[C7]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C7]], %lo2 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C7]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %lo2(s32), [[C8]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[OR]], [[LSHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), %hi1, [[SELECT]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB %lo2, [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C5]], %lo2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %lo2(s32), [[C6]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %mid1, %lo2(s32) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR %lo1, %lo2(s32) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL %mid1, [[SUB5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR %mid1, [[SUB4]](s32) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[OR1]], [[LSHR4]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), %lo1, [[SELECT2]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C12]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), [[LSHR2]], [[C9]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C13]] - ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C13]], [[SUB1]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C13]] - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C14]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[OR1]], [[LSHR4]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), %lo1, [[SELECT2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[LSHR2]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C8]] + ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C8]], [[SUB1]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C8]] + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C9]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL %hi1, [[SUB1]](s32) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR %hi1, [[SUB7]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[SUB1]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL3]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL %hi1, [[SUB6]](s32) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C16]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s32), [[SHL2]], [[C15]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND6]](s32), [[OR2]], [[SHL4]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[AND7]](s32), [[C]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s32), [[SHL2]], [[C10]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s32), [[OR2]], [[SHL4]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s32), [[C]], [[SELECT6]] ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SELECT3]], [[SELECT5]] ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[SELECT7]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C20]] - ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C20]], [[SUB]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C20]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB]](s32), [[C21]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C12]] + ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C12]], [[SUB]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C12]] + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB]](s32), [[C13]] ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[C]], [[SUB]](s32) ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR %hi1, [[SUB]](s32) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[SUB9]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[SHL5]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[C]], [[SUB8]](s32) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP8]], [[C23]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[AND8]](s32), [[OR5]], [[LSHR8]] - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ICMP9]], [[C24]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[AND9]](s32), %hi1, [[SELECT8]] - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ICMP8]], [[C25]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[AND10]](s32), [[LSHR6]], [[C22]] - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C26]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[AND11]](s32), [[OR3]], [[SELECT9]] - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C27]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s32) = G_SELECT [[AND12]](s32), [[OR4]], [[SELECT10]] - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C28]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s32) = G_SELECT [[AND13]](s32), %lo1, [[SELECT11]] - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C29]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s32) = G_SELECT [[AND14]](s32), %mid1, [[SELECT12]] - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C30]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s32) = G_SELECT [[AND15]](s32), [[SELECT1]], [[C19]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[ICMP8]](s32), [[OR5]], [[LSHR8]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[ICMP9]](s32), %hi1, [[SELECT8]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[ICMP8]](s32), [[LSHR6]], [[C14]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR3]], [[SELECT9]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR4]], [[SELECT10]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %lo1, [[SELECT11]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %mid1, [[SELECT12]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SELECT1]], [[C11]] ; CHECK-NEXT: $x10 = COPY [[SELECT13]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT14]](s32) ; CHECK-NEXT: $x12 = COPY [[SELECT15]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mul-ext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mul-ext.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mul-ext.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-mul-ext.mir @@ -162,13 +162,9 @@ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH %lo1, %lo2 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ICMP]], [[ICMP1]] ; CHECK-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL %hi1, %lo2 ; CHECK-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL %mid1, %mid2 ; CHECK-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL %lo1, %hi2 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-shl.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-shl.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-shl.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-shl.mir @@ -104,15 +104,9 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL %xlo, [[SUB]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[SHL]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), %xhi, [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SHL]], [[C2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %xhi, [[SELECT1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -152,15 +146,9 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL %lo1, [[SUB]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[SHL]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), %hi1, [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SHL]], [[C2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %hi1, [[SELECT1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -204,60 +192,38 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL %lo1, [[SUB2]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[SHL]], [[C4]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C7]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), %mid1, [[SELECT1]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C8]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C8]], [[SUB1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C8]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C9]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[SHL]], [[C4]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), %mid1, [[SELECT1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C5]], [[SUB1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s32), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C6]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR %lo1, [[SUB1]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL %mid1, [[SUB5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL3]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %mid1, [[SUB4]](s32) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[OR1]], [[LSHR2]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[AND4]](s32), %lo1, [[SELECT3]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C12]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s32), [[OR1]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), %lo1, [[SELECT3]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo2(s32), [[C7]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL %hi1, %lo2(s32) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C14]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s32), [[SHL4]], [[C13]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s32), [[SHL4]], [[C8]] ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[SELECT5]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C17]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB]](s32), [[C11]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL %lo1, [[SUB]](s32) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP7]], [[C19]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND6]](s32), [[SHL5]], [[C18]] - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C20]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[AND7]](s32), [[SELECT]], [[C15]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C21]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[AND8]](s32), [[SELECT2]], [[C16]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C22]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[AND9]](s32), [[OR2]], [[SELECT6]] - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C23]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[AND10]](s32), %hi1, [[SELECT9]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s32), [[SHL5]], [[C12]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SELECT]], [[C9]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[SELECT2]], [[C10]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[OR2]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), %hi1, [[SELECT9]] ; CHECK-NEXT: $x10 = COPY [[SELECT7]](s32) ; CHECK-NEXT: $x11 = COPY [[SELECT8]](s32) ; CHECK-NEXT: $x12 = COPY [[SELECT10]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-store.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-store.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-store.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-store.mir @@ -232,11 +232,9 @@ ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C6]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C5]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C7]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C6]](s32) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2) ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p0) :: (store (s8) into unknown-address + 3) ; CHECK-NEXT: PseudoRET diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-sub.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-sub.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-sub.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv32/legalize-sub.mir @@ -90,9 +90,7 @@ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB %xlo, %ylo ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %xlo(s32), %ylo ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB %xhi, %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s32) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -124,9 +122,7 @@ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB %lo1, %lo2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo1(s32), %lo2 ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB %hi1, %hi2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s32) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s32) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -158,17 +154,13 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), %lo1(s32), %lo2 ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB %mid1, %mid2 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[SUB1]](s32), %mid1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ICMP1]], [[AND]] ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB %hi1, %hi2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C2]] - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[AND2]] + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB3]], [[OR]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s32) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s32) ; CHECK-NEXT: $x12 = COPY [[SUB4]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-add.mir @@ -122,9 +122,7 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %x00, %y00 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), %y00 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD %x01, %y01 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s64) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -156,9 +154,7 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %lo1, %lo2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), %lo2 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD %hi1, %hi2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[AND]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s64) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -190,17 +186,13 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), %lo2 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD %mid1, %mid2 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD1]](s64), %mid1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[AND]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ADD2]](s64), [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ICMP1]], [[AND1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ADD2]](s64), [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ICMP1]], [[AND]] ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s64) = G_ADD %hi1, %hi2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[OR]], [[C2]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[ADD3]], [[AND2]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[ADD3]], [[OR]] ; CHECK-NEXT: $x10 = COPY [[ADD]](s64) ; CHECK-NEXT: $x11 = COPY [[ADD2]](s64) ; CHECK-NEXT: $x12 = COPY [[ADD4]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-ashr.mir @@ -149,15 +149,9 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C3]](s64) ; CHECK-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[SUB]](s64) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[OR]], [[ASHR3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), %x00, [[SELECT]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C6]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[ASHR1]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[ASHR3]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %x00, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[ASHR1]], [[ASHR2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -198,15 +192,9 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR %hi1, [[C2]](s64) ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR %hi1, [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[OR]], [[ASHR2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), %lo1, [[SELECT]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %lo1, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[ASHR]], [[ASHR1]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -250,95 +238,63 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ASHR]], [[SUB3]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[SUB2]](s64) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[OR]], [[ASHR1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP3]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), %hi1, [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB %lo2, [[C7]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C7]], %lo2 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C7]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %lo2(s64), [[C8]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s64), [[OR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s64), %hi1, [[SELECT]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB %lo2, [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C5]], %lo2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %lo2(s64), [[C6]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR %mid1, %lo2(s64) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR %lo1, %lo2(s64) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL %mid1, [[SUB5]](s64) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR %mid1, [[SUB4]](s64) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[OR1]], [[LSHR3]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s64), %lo1, [[SELECT2]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP4]], [[C12]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[LSHR1]], [[C9]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C13]] - ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s64) = G_SUB [[C13]], [[SUB1]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C13]] - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C14]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s64), [[OR1]], [[LSHR3]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s64), %lo1, [[SELECT2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s64), [[LSHR1]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C8]] + ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s64) = G_SUB [[C8]], [[SUB1]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C8]] + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C9]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL %hi1, [[SUB1]](s64) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR %hi1, [[SUB7]](s64) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[ASHR]], [[SUB1]](s64) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL %hi1, [[SUB6]](s64) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ICMP6]], [[C16]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[AND5]](s64), [[SHL2]], [[C15]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ICMP6]], [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[AND6]](s64), [[OR2]], [[SHL4]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s64) = G_AND [[ICMP7]], [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[AND7]](s64), [[ASHR]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s64), [[SHL2]], [[C10]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s64), [[OR2]], [[SHL4]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s64), [[ASHR]], [[SELECT6]] ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT3]], [[SELECT5]] ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT7]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C19]](s64) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C20]] - ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s64) = G_SUB [[C20]], [[SUB]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C20]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB]](s64), [[C21]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C11]](s64) + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C12]] + ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s64) = G_SUB [[C12]], [[SUB]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C12]] + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB]](s64), [[C13]] ; CHECK-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[SUB]](s64) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR %hi1, [[SUB]](s64) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ASHR]], [[SUB9]](s64) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C22]](s64) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C14]](s64) ; CHECK-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[SUB8]](s64) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s64) = G_AND [[ICMP8]], [[C23]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[AND8]](s64), [[OR5]], [[ASHR5]] - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[ICMP9]], [[C24]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[AND9]](s64), %hi1, [[SELECT8]] - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ICMP8]], [[C25]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[AND10]](s64), [[ASHR3]], [[ASHR4]] - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C26]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[AND11]](s64), [[OR3]], [[SELECT9]] - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C27]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[AND12]](s64), [[OR4]], [[SELECT10]] - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C28]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[AND13]](s64), %lo1, [[SELECT11]] - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C29]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[AND14]](s64), %mid1, [[SELECT12]] - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C30]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[AND15]](s64), [[SELECT1]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s64), [[OR5]], [[ASHR5]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s64), %hi1, [[SELECT8]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s64), [[ASHR3]], [[ASHR4]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR3]], [[SELECT9]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR4]], [[SELECT10]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %lo1, [[SELECT11]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %mid1, [[SELECT12]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SELECT1]], [[ASHR2]] ; CHECK-NEXT: $x10 = COPY [[SELECT13]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT14]](s64) ; CHECK-NEXT: $x12 = COPY [[SELECT15]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-icmp.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-icmp.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-icmp.mir @@ -137,9 +137,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), %xhi(s64), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -168,9 +166,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -321,9 +317,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sge), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xhi(s64), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -352,9 +346,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -505,9 +497,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sge), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xhi(s64), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -536,9 +526,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sge), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -689,9 +677,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sle), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ASHR]](s64), [[ASHR1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), %xhi(s64), %yhi - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -720,9 +706,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sle), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -871,9 +855,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[AND]](s64), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -902,9 +884,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1053,9 +1033,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1084,9 +1062,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sge), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1235,9 +1211,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), [[AND]](s64), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1266,9 +1240,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(uge), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1417,9 +1389,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[AND1]](s64), [[AND3]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), [[AND]](s64), [[AND2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 @@ -1448,9 +1418,7 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %xlo(s64), %ylo ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(ule), %xhi(s64), %yhi - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[ICMP2]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %xhi:_(s64) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-lshr.mir @@ -146,15 +146,9 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[SUB]](s64) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s64), [[AND]], [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C7]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[LSHR]], [[C4]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[AND]], [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[LSHR]], [[C4]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -194,15 +188,9 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR %hi1, [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), %lo1, [[SELECT]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[LSHR]], [[C2]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %lo1, [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[LSHR]], [[C2]] ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -245,93 +233,61 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[C]], [[SUB3]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[C]], [[SUB2]](s64) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[OR]], [[LSHR1]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP3]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), %hi1, [[SELECT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB %lo2, [[C7]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C7]], %lo2 - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C7]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %lo2(s64), [[C8]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s64), [[OR]], [[LSHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s64), %hi1, [[SELECT]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB %lo2, [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C5]], %lo2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), %lo2(s64), [[C6]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR %mid1, %lo2(s64) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR %lo1, %lo2(s64) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL %mid1, [[SUB5]](s64) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR3]], [[SHL1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR %mid1, [[SUB4]](s64) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), [[OR1]], [[LSHR4]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s64), %lo1, [[SELECT2]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP4]], [[C12]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), [[LSHR2]], [[C9]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C13]] - ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s64) = G_SUB [[C13]], [[SUB1]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C13]] - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C14]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s64), [[OR1]], [[LSHR4]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s64), %lo1, [[SELECT2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s64), [[LSHR2]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB6:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C8]] + ; CHECK-NEXT: [[SUB7:%[0-9]+]]:_(s64) = G_SUB [[C8]], [[SUB1]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C8]] + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C9]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL %hi1, [[SUB1]](s64) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR %hi1, [[SUB7]](s64) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[C]], [[SUB1]](s64) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL3]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL %hi1, [[SUB6]](s64) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ICMP6]], [[C16]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[AND5]](s64), [[SHL2]], [[C15]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ICMP6]], [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[AND6]](s64), [[OR2]], [[SHL4]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s64) = G_AND [[ICMP7]], [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[AND7]](s64), [[C]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s64), [[SHL2]], [[C10]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s64), [[OR2]], [[SHL4]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s64), [[C]], [[SELECT6]] ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT3]], [[SELECT5]] ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT7]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C20]] - ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s64) = G_SUB [[C20]], [[SUB]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C20]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB]](s64), [[C21]] + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB8:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C12]] + ; CHECK-NEXT: [[SUB9:%[0-9]+]]:_(s64) = G_SUB [[C12]], [[SUB]] + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C12]] + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB]](s64), [[C13]] ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[C]], [[SUB]](s64) ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR %hi1, [[SUB]](s64) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[C]], [[SUB9]](s64) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR7]], [[SHL5]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[C]], [[SUB8]](s64) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s64) = G_AND [[ICMP8]], [[C23]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[AND8]](s64), [[OR5]], [[LSHR8]] - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[ICMP9]], [[C24]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[AND9]](s64), %hi1, [[SELECT8]] - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ICMP8]], [[C25]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[AND10]](s64), [[LSHR6]], [[C22]] - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C26]] - ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[AND11]](s64), [[OR3]], [[SELECT9]] - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C27]] - ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[AND12]](s64), [[OR4]], [[SELECT10]] - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C28]] - ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[AND13]](s64), %lo1, [[SELECT11]] - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C29]] - ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[AND14]](s64), %mid1, [[SELECT12]] - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C30]] - ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[AND15]](s64), [[SELECT1]], [[C19]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s64), [[OR5]], [[LSHR8]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s64), %hi1, [[SELECT8]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s64), [[LSHR6]], [[C14]] + ; CHECK-NEXT: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR3]], [[SELECT9]] + ; CHECK-NEXT: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR4]], [[SELECT10]] + ; CHECK-NEXT: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %lo1, [[SELECT11]] + ; CHECK-NEXT: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %mid1, [[SELECT12]] + ; CHECK-NEXT: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SELECT1]], [[C11]] ; CHECK-NEXT: $x10 = COPY [[SELECT13]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT14]](s64) ; CHECK-NEXT: $x12 = COPY [[SELECT15]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mul-ext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mul-ext.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mul-ext.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-mul-ext.mir @@ -194,13 +194,9 @@ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH %lo1, %lo2 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[MUL1]], [[MUL2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD]](s64), [[MUL2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ADD]], [[UMULH]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[ADD1]](s64), [[UMULH]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ICMP]], [[ICMP1]] ; CHECK-NEXT: [[MUL3:%[0-9]+]]:_(s64) = G_MUL %hi1, %lo2 ; CHECK-NEXT: [[MUL4:%[0-9]+]]:_(s64) = G_MUL %mid1, %mid2 ; CHECK-NEXT: [[MUL5:%[0-9]+]]:_(s64) = G_MUL %lo1, %hi2 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-shl.mir @@ -136,15 +136,9 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL %x00, [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[SHL]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), %x01, [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SHL]], [[C2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %x01, [[SELECT1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -184,15 +178,9 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL %lo1, [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[SHL]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), %hi1, [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SHL]], [[C2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %hi1, [[SELECT1]] ; CHECK-NEXT: $x10 = COPY [[SELECT]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -236,60 +224,38 @@ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL %lo1, [[SUB2]](s64) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[C5]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s64), [[SHL]], [[C4]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[C6]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s64), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ICMP3]], [[C7]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s64), %mid1, [[SELECT1]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C8]] - ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C8]], [[SUB1]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C8]] - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C9]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s64), [[SHL]], [[C4]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s64), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s64), %mid1, [[SELECT1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C5]] + ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C5]], [[SUB1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB1]](s64), [[C5]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C6]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR %lo1, [[SUB1]](s64) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL %mid1, [[SUB5]](s64) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL3]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR %mid1, [[SUB4]](s64) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ICMP4]], [[C10]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s64), [[OR1]], [[LSHR2]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ICMP5]], [[C11]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s64), %lo1, [[SELECT3]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C12]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s64), [[OR1]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s64), %lo1, [[SELECT3]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo2(s64), [[C7]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL %hi1, %lo2(s64) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ICMP6]], [[C14]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[AND5]](s64), [[SHL4]], [[C13]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s64), [[SHL4]], [[C8]] ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT5]] - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C17]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SUB]](s64), [[C11]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL %lo1, [[SUB]](s64) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[ICMP7]], [[C19]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[AND6]](s64), [[SHL5]], [[C18]] - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C20]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[AND7]](s64), [[SELECT]], [[C15]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C21]] - ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[AND8]](s64), [[SELECT2]], [[C16]] - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C22]] - ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[AND9]](s64), [[OR2]], [[SELECT6]] - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s64) = G_AND [[ICMP1]], [[C23]] - ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[AND10]](s64), %hi1, [[SELECT9]] + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s64), [[SHL5]], [[C12]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SELECT]], [[C9]] + ; CHECK-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[SELECT2]], [[C10]] + ; CHECK-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[OR2]], [[SELECT6]] + ; CHECK-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), %hi1, [[SELECT9]] ; CHECK-NEXT: $x10 = COPY [[SELECT7]](s64) ; CHECK-NEXT: $x11 = COPY [[SELECT8]](s64) ; CHECK-NEXT: $x12 = COPY [[SELECT10]](s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-store.mir @@ -263,13 +263,9 @@ ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s8)) ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C6]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC2]], [[C5]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C7]](s64) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2) ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p0) :: (store (s8) into unknown-address + 3) ; CHECK-NEXT: PseudoRET diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rv64/legalize-sub.mir @@ -122,9 +122,7 @@ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB %x00, %y00 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %x00(s64), %y00 ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB %x01, %y01 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[AND]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s64) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -156,9 +154,7 @@ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB %lo1, %lo2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo1(s64), %lo2 ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB %hi1, %hi2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[AND]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[ICMP]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s64) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 @@ -190,17 +186,13 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), %lo1(s64), %lo2 ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB %mid1, %mid2 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ugt), [[SUB1]](s64), %mid1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[AND]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ICMP1]], [[AND1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[ICMP]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SUB1]](s64), [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ICMP1]], [[AND]] ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s64) = G_SUB %hi1, %hi2 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[OR]], [[C2]] - ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB3]], [[AND2]] + ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB3]], [[OR]] ; CHECK-NEXT: $x10 = COPY [[SUB]](s64) ; CHECK-NEXT: $x11 = COPY [[SUB2]](s64) ; CHECK-NEXT: $x12 = COPY [[SUB4]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll --- a/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll @@ -167,8 +167,7 @@ ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shlb $7, %al ; X64-NEXT: sarb $7, %al -; X64-NEXT: movb $1, %cl -; X64-NEXT: sarb %cl, %al +; X64-NEXT: sarb %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-cmp.mir --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-cmp.mir @@ -48,14 +48,16 @@ liveins: $edi, $esi ; CHECK-LABEL: name: test_cmp_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[COPY1:%[0-9]+]]:_(s8) = COPY $sil - ; CHECK: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s8), [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $sil + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s8), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s8) = COPY $sil %2(s1) = G_ICMP intpred(ult), %0(s8), %1 @@ -79,14 +81,16 @@ liveins: $edi, $esi ; CHECK-LABEL: name: test_cmp_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY $si - ; CHECK: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s16), [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $si + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s16), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %0(s16) = COPY $di %1(s16) = COPY $si %2(s1) = G_ICMP intpred(ult), %0(s16), %1 @@ -110,14 +114,16 @@ liveins: $edi, $esi ; CHECK-LABEL: name: test_cmp_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi - ; CHECK: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK: liveins: $edi, $esi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %0(s32) = COPY $edi %1(s32) = COPY $esi %2(s1) = G_ICMP intpred(ult), %0(s32), %1 @@ -141,14 +147,16 @@ liveins: $rdi, $rsi ; CHECK-LABEL: name: test_cmp_i64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi - ; CHECK: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK: liveins: $rdi, $rsi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %0(s64) = COPY $rdi %1(s64) = COPY $rsi %2(s1) = G_ICMP intpred(ult), %0(s64), %1 @@ -172,14 +180,16 @@ liveins: $rdi, $rsi ; CHECK-LABEL: name: test_cmp_p0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi - ; CHECK: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](p0), [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK: liveins: $rdi, $rsi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s8) = G_ICMP intpred(ult), [[COPY]](p0), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %0(p0) = COPY $rdi %1(p0) = COPY $rsi %2(s1) = G_ICMP intpred(ult), %0(p0), %1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir @@ -76,14 +76,16 @@ liveins: $edi ; CHECK-LABEL: name: test_sext_i1 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) - ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 63 - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C]](s8) - ; CHECK: [[COPY1:%[0-9]+]]:_(s8) = COPY [[C]](s8) - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[COPY1]](s8) - ; CHECK: $rax = COPY [[ASHR]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 63 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C]](s8) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[C]](s8) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[COPY1]](s8) + ; CHECK-NEXT: $rax = COPY [[ASHR]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) %2(s64) = G_SEXT %1(s1) @@ -104,10 +106,12 @@ liveins: $edi ; CHECK-LABEL: name: test_sext_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s8) - ; CHECK: $rax = COPY [[SEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s8) + ; CHECK-NEXT: $rax = COPY [[SEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s64) = G_SEXT %0(s8) $rax = COPY %1(s64) @@ -127,10 +131,12 @@ liveins: $edi ; CHECK-LABEL: name: test_sext_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s16) - ; CHECK: $rax = COPY [[SEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s16) + ; CHECK-NEXT: $rax = COPY [[SEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s16) = COPY $di %1(s64) = G_SEXT %0(s16) $rax = COPY %1(s64) @@ -150,10 +156,12 @@ liveins: $edi ; CHECK-LABEL: name: test_sext_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) - ; CHECK: $rax = COPY [[SEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) + ; CHECK-NEXT: $rax = COPY [[SEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s32) = COPY $edi %1(s64) = G_SEXT %0(s32) $rax = COPY %1(s64) @@ -174,12 +182,14 @@ liveins: $edi ; CHECK-LABEL: name: test_zext_i1 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $rax = COPY [[AND]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $rax = COPY [[AND]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) %2(s64) = G_ZEXT %1(s1) @@ -200,10 +210,12 @@ liveins: $edi ; CHECK-LABEL: name: test_zext_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s8) - ; CHECK: $rax = COPY [[ZEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s8) + ; CHECK-NEXT: $rax = COPY [[ZEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s64) = G_ZEXT %0(s8) $rax = COPY %1(s64) @@ -223,10 +235,12 @@ liveins: $edi ; CHECK-LABEL: name: test_zext_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16) - ; CHECK: $rax = COPY [[ZEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16) + ; CHECK-NEXT: $rax = COPY [[ZEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s16) = COPY $di %1(s64) = G_ZEXT %0(s16) $rax = COPY %1(s64) @@ -246,10 +260,12 @@ liveins: $edi ; CHECK-LABEL: name: test_zext_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK: $rax = COPY [[ZEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK-NEXT: $rax = COPY [[ZEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s32) = COPY $edi %1(s64) = G_ZEXT %0(s32) $rax = COPY %1(s64) @@ -270,10 +286,12 @@ liveins: $edi ; CHECK-LABEL: name: test_anyext_i1 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) - ; CHECK: $rax = COPY [[ANYEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: $rax = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) %2(s64) = G_ANYEXT %1(s1) @@ -294,10 +312,12 @@ liveins: $edi ; CHECK-LABEL: name: test_anyext_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) - ; CHECK: $rax = COPY [[ANYEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: $rax = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s8) = COPY $dil %1(s64) = G_ANYEXT %0(s8) $rax = COPY %1(s64) @@ -317,10 +337,12 @@ liveins: $edi ; CHECK-LABEL: name: test_anyext_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s16) - ; CHECK: $rax = COPY [[ANYEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s16) + ; CHECK-NEXT: $rax = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s16) = COPY $di %1(s64) = G_ANYEXT %0(s16) $rax = COPY %1(s64) @@ -340,10 +362,12 @@ liveins: $edi ; CHECK-LABEL: name: test_anyext_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK: $rax = COPY [[ANYEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: $rax = COPY [[ANYEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0(s32) = COPY $edi %1(s64) = G_ANYEXT %0(s32) $rax = COPY %1(s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ext.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ext.mir --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-ext.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ext.mir @@ -104,19 +104,24 @@ liveins: $edi ; X32-LABEL: name: test_zext_i1toi8 - ; X32: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; X32: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; X32: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; X32: $al = COPY [[AND]](s8) - ; X32: RET 0, implicit $al + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X32-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; X32-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] + ; X32-NEXT: $al = COPY [[AND]](s8) + ; X32-NEXT: RET 0, implicit $al + ; ; X64-LABEL: name: test_zext_i1toi8 - ; X64: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X64: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; X64: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; X64: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; X64: $al = COPY [[AND]](s8) - ; X64: RET 0, implicit $al + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; X64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; X64-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] + ; X64-NEXT: $al = COPY [[AND]](s8) + ; X64-NEXT: RET 0, implicit $al %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s8) = G_ZEXT %0(s1) @@ -137,19 +142,24 @@ liveins: $edi ; X32-LABEL: name: test_zext_i1toi16 - ; X32: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X32: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; X32: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; X32: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; X32: $ax = COPY [[AND]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X32-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; X32-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; X32-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; X32-NEXT: $ax = COPY [[AND]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_zext_i1toi16 - ; X64: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X64: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; X64: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; X64: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; X64: $ax = COPY [[AND]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; X64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; X64-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; X64-NEXT: $ax = COPY [[AND]](s16) + ; X64-NEXT: RET 0, implicit $ax %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s16) = G_ZEXT %0(s1) @@ -171,19 +181,24 @@ liveins: $edi ; X32-LABEL: name: test_zext_i1 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; X32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X32: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; X32: $eax = COPY [[AND]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; X32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; X32-NEXT: $eax = COPY [[AND]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_zext_i1 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; X64: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X64: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; X64: $eax = COPY [[AND]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; X64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; X64-NEXT: $eax = COPY [[AND]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) %2(s32) = G_ZEXT %1(s1) @@ -204,15 +219,20 @@ liveins: $edi ; X32-LABEL: name: test_zext_i8toi16 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) - ; X32: $ax = COPY [[ZEXT]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) + ; X32-NEXT: $ax = COPY [[ZEXT]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_zext_i8toi16 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) - ; X64: $ax = COPY [[ZEXT]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) + ; X64-NEXT: $ax = COPY [[ZEXT]](s16) + ; X64-NEXT: RET 0, implicit $ax %0(s8) = COPY $dil %1(s16) = G_ZEXT %0(s8) $ax = COPY %1(s16) @@ -232,15 +252,20 @@ liveins: $edi ; X32-LABEL: name: test_zext_i8 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s8) - ; X32: $eax = COPY [[ZEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s8) + ; X32-NEXT: $eax = COPY [[ZEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_zext_i8 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s8) - ; X64: $eax = COPY [[ZEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s8) + ; X64-NEXT: $eax = COPY [[ZEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s32) = G_ZEXT %0(s8) $eax = COPY %1(s32) @@ -260,15 +285,20 @@ liveins: $edi ; X32-LABEL: name: test_zext_i16 - ; X32: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X32: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) - ; X32: $eax = COPY [[ZEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X32-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) + ; X32-NEXT: $eax = COPY [[ZEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_zext_i16 - ; X64: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X64: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) - ; X64: $eax = COPY [[ZEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) + ; X64-NEXT: $eax = COPY [[ZEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s16) = COPY $di %1(s32) = G_ZEXT %0(s16) $eax = COPY %1(s32) @@ -288,13 +318,18 @@ liveins: $edi ; X32-LABEL: name: test_sext_i1toi8 - ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; X32: $al = COPY [[C]](s8) - ; X32: RET 0, implicit $al + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; X32-NEXT: $al = COPY [[C]](s8) + ; X32-NEXT: RET 0, implicit $al + ; ; X64-LABEL: name: test_sext_i1toi8 - ; X64: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; X64: $al = COPY [[C]](s8) - ; X64: RET 0, implicit $al + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; X64-NEXT: $al = COPY [[C]](s8) + ; X64-NEXT: RET 0, implicit $al %0(s1) = G_IMPLICIT_DEF %1(s8) = G_SEXT %0(s1) $al = COPY %1(s8) @@ -314,13 +349,18 @@ liveins: $edi ; X32-LABEL: name: test_sext_i1toi16 - ; X32: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; X32: $ax = COPY [[C]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; X32-NEXT: $ax = COPY [[C]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_sext_i1toi16 - ; X64: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; X64: $ax = COPY [[C]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; X64-NEXT: $ax = COPY [[C]](s16) + ; X64-NEXT: RET 0, implicit $ax %0(s1) = G_IMPLICIT_DEF %1(s16) = G_SEXT %0(s1) $ax = COPY %1(s16) @@ -341,13 +381,18 @@ liveins: $edi ; X32-LABEL: name: test_sext_i1 - ; X32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; X32: $eax = COPY [[C]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X32-NEXT: $eax = COPY [[C]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_sext_i1 - ; X64: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; X64: $eax = COPY [[C]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X64-NEXT: $eax = COPY [[C]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s1) = G_IMPLICIT_DEF %2(s32) = G_SEXT %0(s1) $eax = COPY %2(s32) @@ -367,15 +412,20 @@ liveins: $edi ; X32-LABEL: name: test_sext_i8toi16 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[COPY]](s8) - ; X32: $ax = COPY [[SEXT]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[COPY]](s8) + ; X32-NEXT: $ax = COPY [[SEXT]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_sext_i8toi16 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[COPY]](s8) - ; X64: $ax = COPY [[SEXT]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[COPY]](s8) + ; X64-NEXT: $ax = COPY [[SEXT]](s16) + ; X64-NEXT: RET 0, implicit $ax %0(s8) = COPY $dil %1(s16) = G_SEXT %0(s8) $ax = COPY %1(s16) @@ -395,15 +445,20 @@ liveins: $edi ; X32-LABEL: name: test_sext_i8 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s8) - ; X32: $eax = COPY [[SEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s8) + ; X32-NEXT: $eax = COPY [[SEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_sext_i8 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s8) - ; X64: $eax = COPY [[SEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s8) + ; X64-NEXT: $eax = COPY [[SEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s32) = G_SEXT %0(s8) $eax = COPY %1(s32) @@ -423,15 +478,20 @@ liveins: $edi ; X32-LABEL: name: test_sext_i16 - ; X32: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X32: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) - ; X32: $eax = COPY [[SEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X32-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) + ; X32-NEXT: $eax = COPY [[SEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_sext_i16 - ; X64: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X64: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) - ; X64: $eax = COPY [[SEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X64-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) + ; X64-NEXT: $eax = COPY [[SEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s16) = COPY $di %1(s32) = G_SEXT %0(s16) $eax = COPY %1(s32) @@ -452,15 +512,20 @@ liveins: $edi ; X32-LABEL: name: test_anyext_i1toi8 - ; X32: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X32: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; X32: $al = COPY [[TRUNC]](s8) - ; X32: RET 0, implicit $al + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X32-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; X32-NEXT: $al = COPY [[TRUNC]](s8) + ; X32-NEXT: RET 0, implicit $al + ; ; X64-LABEL: name: test_anyext_i1toi8 - ; X64: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X64: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; X64: $al = COPY [[TRUNC]](s8) - ; X64: RET 0, implicit $al + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; X64-NEXT: $al = COPY [[TRUNC]](s8) + ; X64-NEXT: RET 0, implicit $al %0(s32) = COPY $edi %1(s1) = G_TRUNC %0(s32) %2(s8) = G_ANYEXT %1(s1) @@ -482,15 +547,20 @@ liveins: $edi ; X32-LABEL: name: test_anyext_i1toi16 - ; X32: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X32: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; X32: $ax = COPY [[TRUNC]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X32-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; X32-NEXT: $ax = COPY [[TRUNC]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_anyext_i1toi16 - ; X64: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; X64: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; X64: $ax = COPY [[TRUNC]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; X64-NEXT: $ax = COPY [[TRUNC]](s16) + ; X64-NEXT: RET 0, implicit $ax %0(s32) = COPY $edi %1(s1) = G_TRUNC %0(s32) %2(s16) = G_ANYEXT %1(s1) @@ -512,15 +582,20 @@ liveins: $edi ; X32-LABEL: name: test_anyext_i1 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X32: $eax = COPY [[ANYEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X32-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_anyext_i1 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X64: $eax = COPY [[ANYEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X64-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s1) = G_TRUNC %0(s8) %2(s32) = G_ANYEXT %1(s1) @@ -541,15 +616,20 @@ liveins: $edi ; X32-LABEL: name: test_anyext_i8toi16 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; X32: $ax = COPY [[ANYEXT]](s16) - ; X32: RET 0, implicit $ax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; X32-NEXT: $ax = COPY [[ANYEXT]](s16) + ; X32-NEXT: RET 0, implicit $ax + ; ; X64-LABEL: name: test_anyext_i8toi16 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; X64: $ax = COPY [[ANYEXT]](s16) - ; X64: RET 0, implicit $ax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; X64-NEXT: $ax = COPY [[ANYEXT]](s16) + ; X64-NEXT: RET 0, implicit $ax %0(s8) = COPY $dil %1(s16) = G_ANYEXT %0(s8) $ax = COPY %1(s16) @@ -569,15 +649,20 @@ liveins: $edi ; X32-LABEL: name: test_anyext_i8 - ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X32: $eax = COPY [[ANYEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X32-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_anyext_i8 - ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY $dil - ; X64: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) - ; X64: $eax = COPY [[ANYEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $dil + ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s8) + ; X64-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s8) = COPY $dil %1(s32) = G_ANYEXT %0(s8) $eax = COPY %1(s32) @@ -597,15 +682,20 @@ liveins: $edi ; X32-LABEL: name: test_anyext_i16 - ; X32: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) - ; X32: $eax = COPY [[ANYEXT]](s32) - ; X32: RET 0, implicit $eax + ; X32: liveins: $edi + ; X32-NEXT: {{ $}} + ; X32-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) + ; X32-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X32-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_anyext_i16 - ; X64: [[COPY:%[0-9]+]]:_(s16) = COPY $di - ; X64: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) - ; X64: $eax = COPY [[ANYEXT]](s32) - ; X64: RET 0, implicit $eax + ; X64: liveins: $edi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $di + ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) + ; X64-NEXT: $eax = COPY [[ANYEXT]](s32) + ; X64-NEXT: RET 0, implicit $eax %0(s16) = COPY $di %1(s32) = G_ANYEXT %0(s16) $eax = COPY %1(s32) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros-undef.mir @@ -21,6 +21,7 @@ ; X64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738367 ; X64-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[CTTZ_ZERO_UNDEF]], [[C1]] ; X64-NEXT: RET 0, implicit [[AND]](s64) + ; ; X86-LABEL: name: test_cttz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -34,8 +35,8 @@ ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDO]], [[CTTZ_ZERO_UNDEF1]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDE]], [[C]] @@ -88,6 +89,7 @@ ; X64-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTTZ_ZERO_UNDEF [[DEF]](s64) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTTZ_ZERO_UNDEF]](s64) ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; ; X86-LABEL: name: test_cttz64 ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) @@ -98,8 +100,8 @@ ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C1]] ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) - ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDO]], [[CTTZ_ZERO_UNDEF1]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDE]], [[C]] diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trailing-zeros.mir @@ -21,6 +21,7 @@ ; X64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738367 ; X64-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[CTTZ_ZERO_UNDEF]], [[C1]] ; X64-NEXT: RET 0, implicit [[AND]](s64) + ; ; X86-LABEL: name: test_cttz35 ; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -34,8 +35,8 @@ ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ_ZERO_UNDEF]], [[C2]] ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDO]], [[CTTZ_ZERO_UNDEF1]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDE]], [[C]] @@ -90,6 +91,7 @@ ; X64-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[DEF]](s64) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTTZ]](s64) ; X64-NEXT: RET 0, implicit [[COPY]](s64) + ; ; X86-LABEL: name: test_cttz64 ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) @@ -100,8 +102,8 @@ ; X86-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[CTTZ]], [[C1]] ; X86-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDO1]] ; X86-NEXT: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) - ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s8) + ; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; X86-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDO]], [[CTTZ_ZERO_UNDEF]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[UADDE]], [[C]] diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir @@ -18,28 +18,29 @@ bb.1 (%ir-block.0): ; X32-LABEL: name: trunc_check ; X32: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF - ; X32: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; X32: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) - ; X32: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; X32: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store (s1)) - ; X32: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) - ; X32: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store (s8)) - ; X32: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; X32: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store (s16)) - ; X32: RET 0 + ; X32-NEXT: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; X32-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) + ; X32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; X32-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] + ; X32-NEXT: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store (s1)) + ; X32-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) + ; X32-NEXT: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store (s8)) + ; X32-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; X32-NEXT: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store (s16)) + ; X32-NEXT: RET 0 + ; ; X64-LABEL: name: trunc_check ; X64: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF - ; X64: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; X64: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; X64: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) - ; X64: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; X64: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store (s1)) - ; X64: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) - ; X64: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store (s8)) - ; X64: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; X64: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store (s16)) - ; X64: RET 0 + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) + ; X64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; X64-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] + ; X64-NEXT: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store (s1)) + ; X64-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) + ; X64-NEXT: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store (s8)) + ; X64-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; X64-NEXT: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store (s16)) + ; X64-NEXT: RET 0 %0(s32) = IMPLICIT_DEF %1(s1) = G_TRUNC %0(s32) %4:_(p0) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll --- a/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll @@ -165,8 +165,7 @@ ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: andb $1, %al -; X64-NEXT: movb $1, %cl -; X64-NEXT: shrb %cl, %al +; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-zext.mir b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-zext.mir --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-zext.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-legalize-zext.mir @@ -68,12 +68,13 @@ ; CHECK-LABEL: name: zext_i1_to_i8 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; CHECK: $al = COPY [[AND]](s8) - ; CHECK: RET 0, implicit $al + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: $al = COPY [[AND]](s8) + ; CHECK-NEXT: RET 0, implicit $al %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s8) = G_ZEXT %0(s1) @@ -95,12 +96,13 @@ ; CHECK-LABEL: name: zext_i1_to_i16 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: $ax = COPY [[AND]](s16) - ; CHECK: RET 0, implicit $ax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: $ax = COPY [[AND]](s16) + ; CHECK-NEXT: RET 0, implicit $ax %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s16) = G_ZEXT %0(s1) @@ -122,11 +124,12 @@ ; CHECK-LABEL: name: zext_i1_to_i32 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s32) = G_ZEXT %0(s1) @@ -148,12 +151,13 @@ ; CHECK-LABEL: name: zext_i1_to_i64 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $rax = COPY [[AND]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $rax = COPY [[AND]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %1:_(s32) = COPY $edi %0:_(s1) = G_TRUNC %1(s32) %2:_(s64) = G_ZEXT %0(s1) @@ -175,12 +179,13 @@ ; CHECK-LABEL: name: zext_i8_to_i16 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: $ax = COPY [[AND]](s16) - ; CHECK: RET 0, implicit $ax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: $ax = COPY [[AND]](s16) + ; CHECK-NEXT: RET 0, implicit $ax %1:_(s32) = COPY $edi %0:_(s8) = G_TRUNC %1(s32) %2:_(s16) = G_ZEXT %0(s8) @@ -202,11 +207,12 @@ ; CHECK-LABEL: name: zext_i8_to_i32 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %1:_(s32) = COPY $edi %0:_(s8) = G_TRUNC %1(s32) %2:_(s32) = G_ZEXT %0(s8) @@ -228,12 +234,13 @@ ; CHECK-LABEL: name: zext_i8_to_i64 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $rax = COPY [[AND]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $rax = COPY [[AND]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %1:_(s32) = COPY $edi %0:_(s8) = G_TRUNC %1(s32) %2:_(s64) = G_ZEXT %0(s8) @@ -255,11 +262,12 @@ ; CHECK-LABEL: name: zext_i16_to_i32 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: $eax = COPY [[AND]](s32) - ; CHECK: RET 0, implicit $eax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: $eax = COPY [[AND]](s32) + ; CHECK-NEXT: RET 0, implicit $eax %1:_(s32) = COPY $edi %0:_(s16) = G_TRUNC %1(s32) %2:_(s32) = G_ZEXT %0(s16) @@ -281,12 +289,13 @@ ; CHECK-LABEL: name: zext_i16_to_i64 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: $rax = COPY [[AND]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: $rax = COPY [[AND]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %1:_(s32) = COPY $edi %0:_(s16) = G_TRUNC %1(s32) %2:_(s64) = G_ZEXT %0(s16) @@ -307,10 +316,11 @@ ; CHECK-LABEL: name: zext_i32_to_i64 ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $edi - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK: $rax = COPY [[ZEXT]](s64) - ; CHECK: RET 0, implicit $rax + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK-NEXT: $rax = COPY [[ZEXT]](s64) + ; CHECK-NEXT: RET 0, implicit $rax %0:_(s32) = COPY $edi %1:_(s64) = G_ZEXT %0(s32) $rax = COPY %1(s64) diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp @@ -212,8 +212,8 @@ CHECK-NEXT: [[OFFSET_1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 CHECK-NEXT: [[VPTR_1:%[0-9]+]]:_(p0) = G_PTR_ADD %vptr:_, [[OFFSET_1]]:_(s64) CHECK-NEXT: [[LOAD_1:%[0-9]+]]:_(s16) = G_LOAD [[VPTR_1]]:_(p0) :: (load (s8) from unknown-address + 1) - CHECK-NEXT: [[FF_MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 CHECK-NEXT: [[V0_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD_0]]:_(s16) + CHECK-NEXT: [[FF_MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 CHECK-NEXT: %v0_zext:_(s32) = G_AND [[V0_EXT]]:_, [[FF_MASK]]:_ CHECK-NEXT: [[V1_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD_1]]:_(s16) CHECK-NEXT: [[SHAMNT:%[0-9]+]]:_(s32) = G_CONSTANT i32 24