diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -118,11 +118,7 @@ GISelKnownBitsAnalysis() : MachineFunctionPass(ID) { initializeGISelKnownBitsAnalysisPass(*PassRegistry::getPassRegistry()); } - GISelKnownBits &get(MachineFunction &MF) { - if (!Info) - Info = std::make_unique(MF); - return *Info.get(); - } + GISelKnownBits &get(MachineFunction &MF); void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; void releaseMemory() override { Info.reset(); } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -35,6 +35,7 @@ MachineIRBuilder &Builder; MachineRegisterInfo &MRI; const LegalizerInfo &LI; + GISelKnownBits *KB; static bool isArtifactCast(unsigned Opc) { switch (Opc) { @@ -50,8 +51,9 @@ public: LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, - const LegalizerInfo &LI) - : Builder(B), MRI(MRI), LI(LI) {} + const LegalizerInfo &LI, + GISelKnownBits *KB = nullptr) + : Builder(B), MRI(MRI), LI(LI), KB(KB) {} bool tryCombineAnyExt(MachineInstr &MI, SmallVectorImpl &DeadInsts, @@ -131,13 +133,26 @@ LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); - auto Mask = Builder.buildConstant( - DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); if (SextSrc && (DstTy != MRI.getType(SextSrc))) SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); - Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask); + APInt ExtMaskVal = MaskVal.zext(DstTy.getScalarSizeInBits()); + Register AndSrc = SextSrc ? SextSrc : TruncSrc; + // Elide G_AND and mask constant if possible. + // The G_AND would also be removed by the post-legalize redundant_and + // combine, but in this very common case, eliding early and regardless of + // OptLevel results in significant compile-time and O0 code-size + // improvements. Inserting unnecessary instructions between a boolean def + // and use can also hinder ISel to detect e.g. that reloading a flags + // register is unnecessary. + if (KB && (KB->getKnownZeroes(AndSrc) | ExtMaskVal).isAllOnes()) { + replaceRegOrBuildCopy(DstReg, AndSrc, MRI, Builder, UpdatedDefs, + Observer); + } else { + auto Mask = Builder.buildConstant(DstTy, ExtMaskVal); + Builder.buildAnd(DstReg, AndSrc, Mask); + } markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "gisel-known-bits" @@ -773,3 +774,12 @@ bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { return false; } + +GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { + if (!Info) { + unsigned MaxDepth = + MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; + Info = std::make_unique(MF, MaxDepth); + } + return *Info.get(); +} diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -218,7 +218,7 @@ // This will keep all the observers notified about new insertions/deletions. RAIIMFObsDelInstaller Installer(MF, WrapperObserver); LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB); - LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI); + LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI, KB); bool Changed = false; SmallVector RetryList; do { diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll @@ -926,7 +926,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll @@ -511,7 +511,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -532,7 +531,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -553,7 +551,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -574,7 +571,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -595,7 +591,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -831,7 +826,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -846,7 +840,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -861,7 +854,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -876,7 +868,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -891,7 +882,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1046,7 +1036,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1066,7 +1055,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1086,7 +1074,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1106,7 +1093,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1126,7 +1112,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1361,7 +1346,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1375,7 +1359,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1389,7 +1372,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1403,7 +1385,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1417,7 +1398,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -3951,15 +3931,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -3983,15 +3959,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4015,15 +3987,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4047,15 +4015,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4079,15 +4043,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4147,9 +4107,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -4165,9 +4123,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -4183,9 +4139,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -4201,9 +4155,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -4219,9 +4171,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -4236,9 +4186,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -4252,9 +4200,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -4268,9 +4214,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -4284,9 +4228,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -4300,9 +4242,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -4316,9 +4256,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -4332,9 +4270,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -4348,9 +4284,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -4364,9 +4298,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -4380,9 +4312,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -4396,15 +4326,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4422,15 +4348,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4448,15 +4370,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4474,15 +4392,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4500,15 +4414,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4666,15 +4576,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -4698,15 +4604,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4730,15 +4632,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4762,15 +4660,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4794,15 +4688,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4862,9 +4752,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -4880,9 +4768,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -4898,9 +4784,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -4916,9 +4800,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -4934,9 +4816,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -4951,9 +4831,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -4967,9 +4845,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -4983,9 +4859,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -4999,9 +4873,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -5015,9 +4887,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -5031,9 +4901,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -5047,9 +4915,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -5063,9 +4929,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -5079,9 +4943,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -5095,9 +4957,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -5111,15 +4971,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5137,15 +4993,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5163,15 +5015,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5189,15 +5037,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5215,15 +5059,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5381,15 +5221,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -5413,15 +5249,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -5445,15 +5277,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -5477,15 +5305,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5509,15 +5333,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5576,9 +5396,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -5593,9 +5411,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -5610,9 +5426,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -5627,9 +5441,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -5644,9 +5456,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -5661,9 +5471,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -5677,9 +5485,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -5693,9 +5499,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -5709,9 +5513,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -5725,9 +5527,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -5741,9 +5541,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -5757,9 +5555,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -5773,9 +5569,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -5789,9 +5583,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -5805,9 +5597,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -5821,15 +5611,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5847,15 +5633,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5873,15 +5655,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5899,15 +5677,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5925,15 +5699,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6091,15 +5861,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -6123,15 +5889,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -6155,15 +5917,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -6187,15 +5945,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6219,15 +5973,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6286,9 +6036,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -6303,9 +6051,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -6320,9 +6066,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -6337,9 +6081,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -6354,9 +6096,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -6371,9 +6111,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -6387,9 +6125,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -6403,9 +6139,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -6419,9 +6153,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -6435,9 +6167,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -6451,9 +6181,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -6467,9 +6195,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -6483,9 +6209,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -6499,9 +6223,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -6515,9 +6237,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -6531,15 +6251,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6557,15 +6273,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6583,15 +6295,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6609,15 +6317,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6635,15 +6339,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll @@ -556,7 +556,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -579,7 +578,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -602,7 +600,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -625,7 +622,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -648,7 +644,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -886,7 +881,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -901,7 +895,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -916,7 +909,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -931,7 +923,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -946,7 +937,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1181,7 +1171,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1203,7 +1192,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1225,7 +1213,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1247,7 +1234,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1269,7 +1255,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1526,7 +1511,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1540,7 +1524,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1554,7 +1537,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1568,7 +1550,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1582,7 +1563,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -4257,9 +4237,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4280,9 +4258,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4303,9 +4279,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4326,9 +4300,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4349,9 +4321,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4372,9 +4342,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4394,9 +4362,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4416,9 +4382,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4438,9 +4402,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4460,9 +4422,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -4481,9 +4441,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4501,9 +4459,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4521,9 +4477,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4541,9 +4495,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4561,9 +4513,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -4581,9 +4531,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4601,9 +4549,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4621,9 +4567,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4641,9 +4585,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4661,9 +4603,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -4681,15 +4621,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -4715,15 +4651,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -4749,15 +4681,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -4783,15 +4711,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -4817,15 +4741,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -4852,9 +4772,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4875,9 +4793,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4898,9 +4814,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4921,9 +4835,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4944,9 +4856,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -4967,9 +4877,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -4985,9 +4893,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -5003,9 +4909,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -5021,9 +4925,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -5039,9 +4941,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -5056,9 +4956,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -5072,9 +4970,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -5088,9 +4984,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -5104,9 +4998,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -5120,9 +5012,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -5136,9 +5026,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -5152,9 +5040,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -5168,9 +5054,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -5184,9 +5068,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -5200,9 +5082,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -5216,15 +5096,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5242,15 +5118,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5268,15 +5140,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5294,15 +5162,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5320,15 +5184,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5347,9 +5207,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5370,9 +5228,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5393,9 +5249,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5416,9 +5270,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5439,9 +5291,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5462,9 +5312,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5484,9 +5332,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5506,9 +5352,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5528,9 +5372,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5550,9 +5392,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5571,9 +5411,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5591,9 +5429,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5611,9 +5447,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5631,9 +5465,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5651,9 +5483,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5671,9 +5501,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5691,9 +5519,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5711,9 +5537,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5731,9 +5555,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5751,9 +5573,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5771,15 +5591,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5805,15 +5621,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5839,15 +5651,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5873,15 +5681,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5907,15 +5711,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5942,9 +5742,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5965,9 +5763,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5988,9 +5784,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6011,9 +5805,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6034,9 +5826,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6057,9 +5847,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -6075,9 +5863,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -6093,9 +5879,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -6111,9 +5895,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -6129,9 +5911,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -6146,9 +5926,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -6162,9 +5940,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -6178,9 +5954,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -6194,9 +5968,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -6210,9 +5982,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -6226,9 +5996,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -6242,9 +6010,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -6258,9 +6024,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -6274,9 +6038,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -6290,9 +6052,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -6306,15 +6066,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6332,15 +6088,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6358,15 +6110,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6384,15 +6132,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6410,15 +6154,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6437,9 +6177,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6460,9 +6198,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6483,9 +6219,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6506,9 +6240,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6529,9 +6261,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6281,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6572,9 +6300,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6593,9 +6319,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6614,9 +6338,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6635,9 +6357,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6656,9 +6376,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6676,9 +6394,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6696,9 +6412,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6716,9 +6430,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6736,9 +6448,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6756,9 +6466,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6776,9 +6484,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6796,9 +6502,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6816,9 +6520,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6836,9 +6538,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6856,15 +6556,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6890,15 +6586,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6924,15 +6616,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6958,15 +6646,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6992,15 +6676,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7027,9 +6707,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7050,9 +6728,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7073,9 +6749,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7096,9 +6770,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7119,9 +6791,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7141,9 +6811,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -7158,9 +6826,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -7175,9 +6841,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -7192,9 +6856,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -7209,9 +6871,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -7226,9 +6886,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -7242,9 +6900,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -7258,9 +6914,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -7274,9 +6928,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -7290,9 +6942,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -7306,9 +6956,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -7322,9 +6970,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -7338,9 +6984,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -7354,9 +6998,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -7370,9 +7012,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -7386,15 +7026,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7412,15 +7048,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7438,15 +7070,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7464,15 +7092,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7490,15 +7114,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7517,9 +7137,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7540,9 +7158,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7563,9 +7179,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7586,9 +7200,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7609,9 +7221,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7631,9 +7241,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7652,9 +7260,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7673,9 +7279,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7694,9 +7298,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7715,9 +7317,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7736,9 +7336,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7756,9 +7354,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7776,9 +7372,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7796,9 +7390,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7816,9 +7408,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7836,9 +7426,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7856,9 +7444,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7876,9 +7462,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7896,9 +7480,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7916,9 +7498,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7936,15 +7516,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7970,15 +7546,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8004,15 +7576,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8038,15 +7606,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8072,15 +7636,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8107,9 +7667,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8130,9 +7688,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8153,9 +7709,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8176,9 +7730,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8199,9 +7751,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8221,9 +7771,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -8238,9 +7786,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -8255,9 +7801,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -8272,9 +7816,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -8289,9 +7831,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -8306,9 +7846,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -8322,9 +7860,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -8338,9 +7874,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -8354,9 +7888,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -8370,9 +7902,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -8386,9 +7916,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -8402,9 +7930,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -8418,9 +7944,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -8434,9 +7958,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -8450,9 +7972,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -8466,15 +7986,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8492,15 +8008,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8518,15 +8030,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8544,15 +8052,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8570,15 +8074,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll @@ -926,7 +926,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll @@ -926,7 +926,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll @@ -546,7 +546,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -567,7 +566,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -588,7 +586,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -609,7 +606,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -630,7 +626,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 @@ -866,7 +861,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -881,7 +875,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -896,7 +889,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -911,7 +903,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -926,7 +917,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1081,7 +1071,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: casp x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1101,7 +1090,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspa x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1121,7 +1109,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspl x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1141,7 +1128,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1161,7 +1147,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x2, x9, x11 -; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] ; -O0: eor x8, x10, x8 ; -O0: eor x11, x9, x11 @@ -1396,7 +1381,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1410,7 +1394,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1424,7 +1407,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1438,7 +1420,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1452,7 +1433,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -4076,15 +4056,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -4108,15 +4084,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4140,15 +4112,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4172,15 +4140,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4204,15 +4168,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4272,9 +4232,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -4290,9 +4248,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -4308,9 +4264,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -4326,9 +4280,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -4344,9 +4296,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -4361,9 +4311,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -4377,9 +4325,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -4393,9 +4339,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -4409,9 +4353,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -4425,9 +4367,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -4441,9 +4381,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -4457,9 +4395,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -4473,9 +4409,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -4489,9 +4423,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -4505,9 +4437,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -4521,15 +4451,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4547,15 +4473,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4573,15 +4495,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4599,15 +4517,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4625,15 +4539,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -4791,15 +4701,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -4823,15 +4729,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -4855,15 +4757,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -4887,15 +4785,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4919,15 +4813,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -4987,9 +4877,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -5005,9 +4893,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -5023,9 +4909,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -5041,9 +4925,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -5059,9 +4941,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -5076,9 +4956,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -5092,9 +4970,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -5108,9 +4984,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -5124,9 +4998,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -5140,9 +5012,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -5156,9 +5026,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -5172,9 +5040,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -5188,9 +5054,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -5204,9 +5068,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -5220,9 +5082,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -5236,15 +5096,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5262,15 +5118,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5288,15 +5140,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5314,15 +5162,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5340,15 +5184,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5506,15 +5346,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -5538,15 +5374,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -5570,15 +5402,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -5602,15 +5430,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5634,15 +5458,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -5701,9 +5521,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -5718,9 +5536,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -5735,9 +5551,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -5752,9 +5566,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -5769,9 +5581,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -5786,9 +5596,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -5802,9 +5610,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -5818,9 +5624,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -5834,9 +5638,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -5850,9 +5652,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -5866,9 +5666,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -5882,9 +5680,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -5898,9 +5694,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -5914,9 +5708,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -5930,9 +5722,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -5946,15 +5736,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5972,15 +5758,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -5998,15 +5780,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6024,15 +5802,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6050,15 +5824,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6216,15 +5986,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: casp x0, x1, x2, x3, [x8] @@ -6248,15 +6014,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspa x0, x1, x2, x3, [x8] @@ -6280,15 +6042,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspl x0, x1, x2, x3, [x8] @@ -6312,15 +6070,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6344,15 +6098,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x9, x9, x10 -; -O0: subs x9, x9, x10 ; -O0: subs x9, x9, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w9, w9, w11, ne -; -O0: and w13, w9, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x2, x11, x12, ne -; -O0: and w11, w9, #0x1 +; -O0: subs x13, x13, x10 +; -O0: csel w11, w9, w11, eq +; -O0: ands w13, w11, #0x1 +; -O0: csel x2, x9, x12, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x9, x9, x10, ne ; -O0: caspal x0, x1, x2, x3, [x8] @@ -6411,9 +6161,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -6428,9 +6176,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -6445,9 +6191,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -6462,9 +6206,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -6479,9 +6221,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -6496,9 +6236,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -6512,9 +6250,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -6528,9 +6264,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -6544,9 +6278,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -6560,9 +6292,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -6576,9 +6306,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -6592,9 +6320,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -6608,9 +6334,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -6624,9 +6348,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -6640,9 +6362,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -6656,15 +6376,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6682,15 +6398,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6708,15 +6420,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6734,15 +6442,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6760,15 +6464,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll @@ -926,7 +926,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -949,7 +948,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -972,7 +970,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -995,7 +992,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1018,7 +1014,6 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O0: adds x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: subs w10, w10, #1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 @@ -1306,7 +1301,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1321,7 +1315,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1336,7 +1329,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1351,7 +1343,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1366,7 +1357,6 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O0: adds x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; @@ -1706,7 +1696,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1728,7 +1717,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1750,7 +1738,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1772,7 +1759,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -1794,7 +1780,6 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O0: subs x14, x8, x10 -; -O0: and w10, w8, #0x1 ; -O0: ldaxp x10, x9, [x11] ; -O0: cmp x10, x12 ; -O0: cmp x9, x13 @@ -2081,7 +2066,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2095,7 +2079,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2109,7 +2092,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2123,7 +2105,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2137,7 +2118,6 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O0: subs x9, x8, x9 -; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: @@ -5392,9 +5372,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5415,9 +5393,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5438,9 +5414,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5461,9 +5435,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5484,9 +5456,7 @@ ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -5507,9 +5477,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5529,9 +5497,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5551,9 +5517,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5573,9 +5537,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5595,9 +5557,7 @@ ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -5616,9 +5576,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5636,9 +5594,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5656,9 +5612,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5676,9 +5630,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5696,9 +5648,7 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, gt ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -5716,9 +5666,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5736,9 +5684,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5756,9 +5702,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5776,9 +5720,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5796,9 +5738,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, gt ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -5816,15 +5756,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5850,15 +5786,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5884,15 +5816,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -5918,15 +5846,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5952,15 +5876,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -5987,9 +5907,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6010,9 +5928,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6033,9 +5949,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6056,9 +5970,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6079,9 +5991,7 @@ ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, gt ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6102,9 +6012,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: @@ -6120,9 +6028,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: @@ -6138,9 +6044,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: @@ -6156,9 +6060,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: @@ -6174,9 +6076,7 @@ ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: @@ -6191,9 +6091,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: @@ -6207,9 +6105,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: @@ -6223,9 +6119,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: @@ -6239,9 +6133,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: @@ -6255,9 +6147,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: @@ -6271,9 +6161,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: @@ -6287,9 +6175,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: @@ -6303,9 +6189,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: @@ -6319,9 +6203,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: @@ -6335,9 +6217,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, gt ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: @@ -6351,15 +6231,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6377,15 +6253,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6403,15 +6275,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6429,15 +6297,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6455,15 +6319,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -6482,9 +6342,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6505,9 +6363,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6528,9 +6384,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6551,9 +6405,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6574,9 +6426,7 @@ ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -6597,9 +6447,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6619,9 +6467,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6641,9 +6487,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_release: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6663,9 +6507,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6685,9 +6527,7 @@ ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst: ; -O0: sxth w10, w8 ; -O0: subs w10, w10, w9, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -6706,9 +6546,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6726,9 +6564,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6746,9 +6582,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6766,9 +6600,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6786,9 +6618,7 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, le ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -6806,9 +6636,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6826,9 +6654,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6846,9 +6672,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6866,9 +6690,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6886,9 +6708,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, le ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -6906,15 +6726,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -6940,15 +6756,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -6974,15 +6786,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -7008,15 +6816,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7042,15 +6846,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -7077,9 +6877,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7100,9 +6898,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7123,9 +6919,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_release: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7146,9 +6940,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7169,9 +6961,7 @@ ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst: ; -O0: sxtb w9, w10 ; -O0: subs w9, w9, w8, sxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, le ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7192,9 +6982,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: @@ -7210,9 +6998,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: @@ -7228,9 +7014,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_release: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: @@ -7246,9 +7030,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: @@ -7264,9 +7046,7 @@ ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O0: sxth w10, w9 ; -O0: subs w10, w10, w8, sxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: @@ -7281,9 +7061,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: @@ -7297,9 +7075,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: @@ -7313,9 +7089,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: @@ -7329,9 +7103,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: @@ -7345,9 +7117,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: @@ -7361,9 +7131,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: @@ -7377,9 +7145,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: @@ -7393,9 +7159,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: @@ -7409,9 +7173,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: @@ -7425,9 +7187,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, le ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: @@ -7441,15 +7201,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7467,15 +7223,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7493,15 +7245,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7519,15 +7267,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7545,15 +7289,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -7572,9 +7312,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7595,9 +7333,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7618,9 +7354,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7641,9 +7375,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7664,9 +7396,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -7686,9 +7416,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7707,9 +7435,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7728,9 +7454,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7749,9 +7473,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7770,9 +7492,7 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -7791,9 +7511,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7811,9 +7529,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7831,9 +7547,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7851,9 +7565,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7871,9 +7583,7 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, hi ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -7891,9 +7601,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7911,9 +7619,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7931,9 +7637,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7951,9 +7655,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7971,9 +7673,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, hi ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -7991,15 +7691,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8025,15 +7721,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8059,15 +7751,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -8093,15 +7781,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8127,15 +7811,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -8162,9 +7842,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8185,9 +7863,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8208,9 +7884,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8231,9 +7905,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8254,9 +7926,7 @@ ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, hi ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8276,9 +7946,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: @@ -8293,9 +7961,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: @@ -8310,9 +7976,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: @@ -8327,9 +7991,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: @@ -8344,9 +8006,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: @@ -8361,9 +8021,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: @@ -8377,9 +8035,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: @@ -8393,9 +8049,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: @@ -8409,9 +8063,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: @@ -8425,9 +8077,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: @@ -8441,9 +8091,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: @@ -8457,9 +8105,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: @@ -8473,9 +8119,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: @@ -8489,9 +8133,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: @@ -8505,9 +8147,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, hi ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: @@ -8521,15 +8161,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8547,15 +8183,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8573,15 +8205,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8599,15 +8227,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8625,15 +8249,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -8652,9 +8272,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8675,9 +8293,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8698,9 +8314,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8721,9 +8335,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8744,9 +8356,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -8766,9 +8376,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8787,9 +8395,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8808,9 +8414,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_release: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8829,9 +8433,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8850,9 +8452,7 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst: ; -O0: subs w10, w10, w9, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxrh w9, [x11] ; -O0: cmp w9, w8, uxth ; -O0: stlxrh w10, w12, [x11] @@ -8871,9 +8471,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8891,9 +8489,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8911,9 +8507,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_release: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8931,9 +8525,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8951,9 +8543,7 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst: ; -O0: subs w10, w8, w9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w12, w8, w9, ne +; -O0: csel w12, w8, w9, ls ; -O0: ldaxr w9, [x11] ; -O0: cmp w9, w8 ; -O0: stlxr w10, w12, [x11] @@ -8971,9 +8561,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -8991,9 +8579,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9011,9 +8597,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_release: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9031,9 +8615,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9051,9 +8633,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst: ; -O0: subs x10, x8, x9 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x12, x8, x9, ne +; -O0: csel x12, x8, x9, ls ; -O0: ldaxr x9, [x11] ; -O0: cmp x9, x8 ; -O0: stlxr w10, x12, [x11] @@ -9071,15 +8651,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9105,15 +8681,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9139,15 +8711,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_release: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldxp x10, x9, [x11] @@ -9173,15 +8741,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9207,15 +8771,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O0: subs x8, x8, x9 -; -O0: subs x8, x8, x9 ; -O0: subs x8, x8, x12 -; -O0: and w13, w13, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel w8, w8, w10, ne -; -O0: and w13, w8, #0x1 -; -O0: ands w13, w13, #0x1 -; -O0: csel x14, x10, x12, ne -; -O0: and w10, w8, #0x1 +; -O0: subs x13, x13, x9 +; -O0: csel w10, w8, w10, eq +; -O0: ands w13, w10, #0x1 +; -O0: csel x14, x8, x12, ne ; -O0: ands w10, w10, #0x1 ; -O0: csel x15, x8, x9, ne ; -O0: ldaxp x10, x9, [x11] @@ -9242,9 +8802,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9265,9 +8823,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9288,9 +8844,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9311,9 +8865,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9334,9 +8886,7 @@ ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst: ; -O0: and w9, w10, #0xff ; -O0: subs w9, w9, w8, uxtb -; -O0: and w9, w9, #0x1 -; -O0: ands w9, w9, #0x1 -; -O0: csel w12, w10, w8, ne +; -O0: csel w12, w10, w8, ls ; -O0: ldaxrb w9, [x11] ; -O0: cmp w9, w10, uxtb ; -O0: stlxrb w8, w12, [x11] @@ -9356,9 +8906,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: @@ -9373,9 +8921,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: @@ -9390,9 +8936,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: @@ -9407,9 +8951,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: @@ -9424,9 +8966,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) { ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O0: subs w10, w10, w8, uxth -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: @@ -9441,9 +8981,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: @@ -9457,9 +8995,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: @@ -9473,9 +9009,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: @@ -9489,9 +9023,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: @@ -9505,9 +9037,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) { ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: ; -O0: subs w10, w9, w8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel w8, w9, w8, ne +; -O0: csel w8, w9, w8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: @@ -9521,9 +9051,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: @@ -9537,9 +9065,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: @@ -9553,9 +9079,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: @@ -9569,9 +9093,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: @@ -9585,9 +9107,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) { ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: ; -O0: subs x10, x9, x8 -; -O0: and w10, w10, #0x1 -; -O0: ands w10, w10, #0x1 -; -O0: csel x8, x9, x8, ne +; -O0: csel x8, x9, x8, ls ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: @@ -9601,15 +9121,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9627,15 +9143,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9653,15 +9165,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9679,15 +9187,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange @@ -9705,15 +9209,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x10 -; -O0: subs x8, x8, x11 -; -O0: and w12, w12, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel w8, w8, w9, ne -; -O0: and w12, w8, #0x1 -; -O0: ands w12, w12, #0x1 -; -O0: csel x9, x9, x11, ne -; -O0: and w11, w8, #0x1 +; -O0: subs x8, x8, x9 +; -O0: subs x12, x12, x10 +; -O0: csel w11, w8, w11, eq +; -O0: ands w12, w11, #0x1 +; -O0: csel x9, x8, x9, ne ; -O0: ands w11, w11, #0x1 ; -O0: csel x8, x8, x10, ne ; -O0: bl __atomic_compare_exchange diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -1770,10 +1770,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxtb w9, w10 ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, sxtb -; CHECK-NOLSE-O0-NEXT: cset w9, le -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, le ; CHECK-NOLSE-O0-NEXT: LBB33_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB33_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -1843,10 +1840,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxtb w9, w10 ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, sxtb -; CHECK-NOLSE-O0-NEXT: cset w9, gt -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, gt ; CHECK-NOLSE-O0-NEXT: LBB34_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB34_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -1917,10 +1911,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: and w9, w10, #0xff ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, uxtb -; CHECK-NOLSE-O0-NEXT: cset w9, ls -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ls ; CHECK-NOLSE-O0-NEXT: LBB35_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB35_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -1991,10 +1982,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: and w9, w10, #0xff ; CHECK-NOLSE-O0-NEXT: subs w9, w9, w8, uxtb -; CHECK-NOLSE-O0-NEXT: cset w9, hi -; CHECK-NOLSE-O0-NEXT: and w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w9, w9, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w10, w8, hi ; CHECK-NOLSE-O0-NEXT: LBB36_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB36_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2463,10 +2451,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, sxth -; CHECK-NOLSE-O0-NEXT: cset w10, le -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, le ; CHECK-NOLSE-O0-NEXT: LBB43_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB43_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2536,10 +2521,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: sxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, sxth -; CHECK-NOLSE-O0-NEXT: cset w10, gt -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, gt ; CHECK-NOLSE-O0-NEXT: LBB44_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB44_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2610,10 +2592,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: uxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, uxth -; CHECK-NOLSE-O0-NEXT: cset w10, ls -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ls ; CHECK-NOLSE-O0-NEXT: LBB45_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB45_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2684,10 +2663,7 @@ ; CHECK-NOLSE-O0-NEXT: ldr w9, [sp, #24] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: uxth w10, w8 ; CHECK-NOLSE-O0-NEXT: subs w10, w10, w9, uxth -; CHECK-NOLSE-O0-NEXT: cset w10, hi -; CHECK-NOLSE-O0-NEXT: and w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: ands w10, w10, #0x1 -; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, ne +; CHECK-NOLSE-O0-NEXT: csel w12, w8, w9, hi ; CHECK-NOLSE-O0-NEXT: LBB46_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB46_1 Depth=1 ; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -2763,8 +2739,7 @@ ; CHECK-NOLSE-O0-NEXT: LBB47_3: ; CHECK-NOLSE-O0-NEXT: and w8, w0, #0xff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w1, uxtb -; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-NOLSE-O0-NEXT: cset w1, eq ; CHECK-NOLSE-O0-NEXT: ret ; ; CHECK-LSE-O1-LABEL: cmpxchg_i8: @@ -2784,8 +2759,7 @@ ; CHECK-LSE-O0-NEXT: casb w0, w2, [x8] ; CHECK-LSE-O0-NEXT: and w8, w0, #0xff ; CHECK-LSE-O0-NEXT: subs w8, w8, w1, uxtb -; CHECK-LSE-O0-NEXT: cset w8, eq -; CHECK-LSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-LSE-O0-NEXT: cset w1, eq ; CHECK-LSE-O0-NEXT: ret %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic ret { i8, i1 } %res @@ -2829,8 +2803,7 @@ ; CHECK-NOLSE-O0-NEXT: LBB48_3: ; CHECK-NOLSE-O0-NEXT: and w8, w0, #0xffff ; CHECK-NOLSE-O0-NEXT: subs w8, w8, w1, uxth -; CHECK-NOLSE-O0-NEXT: cset w8, eq -; CHECK-NOLSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-NOLSE-O0-NEXT: cset w1, eq ; CHECK-NOLSE-O0-NEXT: ret ; ; CHECK-LSE-O1-LABEL: cmpxchg_i16: @@ -2850,8 +2823,7 @@ ; CHECK-LSE-O0-NEXT: cash w0, w2, [x8] ; CHECK-LSE-O0-NEXT: and w8, w0, #0xffff ; CHECK-LSE-O0-NEXT: subs w8, w8, w1, uxth -; CHECK-LSE-O0-NEXT: cset w8, eq -; CHECK-LSE-O0-NEXT: and w1, w8, #0x1 +; CHECK-LSE-O0-NEXT: cset w1, eq ; CHECK-LSE-O0-NEXT: ret %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic ret { i16, i1 } %res diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir @@ -2,7 +2,7 @@ # Check that when we combine ZEXT/ANYEXT we assign the correct location. # CHECK: !8 = !DILocation(line: 23, column: 5, scope: !4) -# CHECK: G_AND %16, %15, debug-location !8 +# CHECK: G_AND %15, %16, debug-location !8 --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-debugloc.mir @@ -2,7 +2,7 @@ # Check that when we combine SEXT we assign the correct debug location. # CHECK: !9 = !DILocation(line: 36, column: 21, scope: !4) -# CHECK: G_AND %5, %4, debug-location !9 +# CHECK: G_AND %4, %5, debug-location !9 --- | target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll @@ -1,9 +1,7 @@ ; RUN: llc -mtriple=arm64-apple-ios %s -o - -O0 -global-isel=1 | FileCheck %s define void @foo(i512 %in) { ; CHECK-LABEL: foo: -; CHECK: subs -; CHECK-NEXT: cset -; CHECK-NEXT: tbnz +; CHECK: cbz switch i512 %in, label %default [ i512 3923188584616675477397368389504791510063972152790021570560, label %l1 i512 3923188584616675477397368389504791510063972152790021570561, label %l2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll b/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/invoke-region.ll @@ -38,8 +38,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.continue: ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[C2]](s16), %bb.1, [[C3]](s16), %bb.2 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -95,8 +95,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.continue: ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[ANYEXT]](s16), %bb.1, [[C2]](s16), %bb.2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll @@ -38,6 +38,7 @@ ; TRANSLATED-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp ; TRANSLATED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; TRANSLATED-NEXT: G_BR %bb.2 + ; ; PRESELECTION-LABEL: name: test ; PRESELECTION: bb.1.entry: ; PRESELECTION-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) @@ -50,8 +51,8 @@ ; PRESELECTION-NEXT: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; PRESELECTION-NEXT: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 100000 ; PRESELECTION-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:gpr(s32) = G_CONSTANT_FOLD_BARRIER [[C1]] - ; PRESELECTION-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 ; PRESELECTION-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT [[ASSERT_ZEXT]](s8) + ; PRESELECTION-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 ; PRESELECTION-NEXT: [[AND:%[0-9]+]]:gpr(s32) = G_AND [[ANYEXT]], [[C2]] ; PRESELECTION-NEXT: G_BRCOND [[AND]](s32), %bb.3 ; PRESELECTION-NEXT: G_BR %bb.2 @@ -69,6 +70,7 @@ ; PRESELECTION-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp ; PRESELECTION-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; PRESELECTION-NEXT: G_BR %bb.2 + ; ; POSTSELECTION-LABEL: name: test ; POSTSELECTION: bb.1.entry: ; POSTSELECTION-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir @@ -10,9 +10,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) %0:_(s64) = COPY $x0 @@ -37,11 +35,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x2 = COPY [[UADDE2]](s64) @@ -89,9 +84,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) %0:_(s64) = COPY $x0 @@ -119,9 +112,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) %0:_(s64) = COPY $x0 @@ -367,15 +358,15 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[IMPLICIT_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT0]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[IMPLICIT_DEF]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<4 x s16>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[VAL0:%[0-9]+]]:_(s16), [[VAL1:%[0-9]+]]:_(s16), [[VAL2:%[0-9]+]]:_(s16), [[VAL3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ADD]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[VAL0]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ADD]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir @@ -145,8 +145,8 @@ ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[DEF]] ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 ; CHECK: %ext:_(s64) = G_AND [[ANYEXT]], [[C1]] ; CHECK: $x0 = COPY %ext(s64) ; CHECK: RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -9,8 +9,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY]](s64), [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[ICMP]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -50,11 +50,8 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[C1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[C1]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ICMP2]], [[ICMP]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.1 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: G_BRCOND [[SELECT]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -91,9 +88,7 @@ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -132,9 +127,7 @@ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[AND1]], [[AND3]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND4]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -173,9 +166,7 @@ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[AND1]], [[AND3]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND4]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -214,9 +205,7 @@ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[AND1]], [[AND3]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND4]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -278,9 +267,7 @@ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[OR4]], [[XOR6]] ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[OR5]], [[XOR7]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR6]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND16]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -326,9 +313,7 @@ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[XOR2]] ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[XOR3]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR2]](s64), [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: G_BRCOND [[AND8]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir @@ -292,18 +292,13 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[AND]](s64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTLZ]], [[C3]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C4]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C2]], [[C2]], [[AND2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C2]], [[C2]], [[UADDO1]] ; CHECK-NEXT: [[CTLZ1:%[0-9]+]]:_(s64) = G_CTLZ [[AND1]](s64) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s32), [[UADDO]], [[CTLZ1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[UADDE]], [[C2]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[SELECT]], [[C5]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C4]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SELECT1]], [[C2]], [[AND5]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[CTLZ1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDE]], [[C2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[SELECT]], [[C4]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SELECT1]], [[C2]], [[USUBO1]] ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir @@ -172,8 +172,8 @@ ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) @@ -211,8 +211,8 @@ ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) @@ -250,8 +250,8 @@ ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) @@ -288,8 +288,8 @@ ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir @@ -19,6 +19,7 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s8 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -51,6 +52,7 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s16 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -83,6 +85,7 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK-NEXT: $w0 = COPY [[CTLZ]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s32 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -112,6 +115,7 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64) ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; ; CHECK-CSSC-LABEL: name: s64 ; CHECK-CSSC: liveins: $x0 ; CHECK-CSSC-NEXT: {{ $}} @@ -144,6 +148,7 @@ ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<4 x s32>) = G_CTPOP [[AND]](<4 x s32>) ; CHECK-NEXT: $q0 = COPY [[CTPOP]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; ; CHECK-CSSC-LABEL: name: v4s32 ; CHECK-CSSC: liveins: $q0 ; CHECK-CSSC-NEXT: {{ $}} @@ -180,6 +185,7 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64) ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; ; CHECK-CSSC-LABEL: name: s35 ; CHECK-CSSC: liveins: $x0 ; CHECK-CSSC-NEXT: {{ $}} @@ -218,17 +224,15 @@ ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTLZ]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[UADDO1]] ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s64) = G_BITREVERSE [[OR]] ; CHECK-NEXT: [[CTLZ1:%[0-9]+]]:_(s64) = G_CTLZ [[BITREVERSE1]](s64) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTLZ1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[CTLZ1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDE]], [[C]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: $x1 = COPY [[SELECT1]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + ; ; CHECK-CSSC-LABEL: name: s65 ; CHECK-CSSC: liveins: $x0, $x1 ; CHECK-CSSC-NEXT: {{ $}} @@ -242,13 +246,10 @@ ; CHECK-CSSC-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ [[OR1]](s64) ; CHECK-CSSC-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK-CSSC-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[CTTZ]], [[C2]] - ; CHECK-CSSC-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C3]] - ; CHECK-CSSC-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[AND]] + ; CHECK-CSSC-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[C]], [[C]], [[UADDO1]] ; CHECK-CSSC-NEXT: [[CTTZ1:%[0-9]+]]:_(s64) = G_CTTZ [[OR]](s64) - ; CHECK-CSSC-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-CSSC-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[CTTZ1]] - ; CHECK-CSSC-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDE]], [[C]] + ; CHECK-CSSC-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[CTTZ1]] + ; CHECK-CSSC-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDE]], [[C]] ; CHECK-CSSC-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-CSSC-NEXT: $x1 = COPY [[SELECT1]](s64) ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0, implicit $x1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-div.mir @@ -6,20 +6,20 @@ bb.0.entry: ; CHECK-LABEL: name: test_div ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 - ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: $w0 = COPY [[SDIV]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; CHECK: $w0 = COPY [[UDIV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: $w0 = COPY [[SDIV]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] + ; CHECK-NEXT: $w0 = COPY [[UDIV]](s32) %0:_(s64) = COPY $x0 %1:_(s64) = COPY $x1 %2:_(s8) = G_TRUNC %0(s64) @@ -43,17 +43,18 @@ ; CHECK-LABEL: name: sdiv_v4s32 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV4]] - ; CHECK: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV5]] - ; CHECK: [[SDIV2:%[0-9]+]]:_(s32) = G_SDIV [[UV2]], [[UV6]] - ; CHECK: [[SDIV3:%[0-9]+]]:_(s32) = G_SDIV [[UV3]], [[UV7]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32), [[SDIV2]](s32), [[SDIV3]](s32) - ; CHECK: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV4]] + ; CHECK-NEXT: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV5]] + ; CHECK-NEXT: [[SDIV2:%[0-9]+]]:_(s32) = G_SDIV [[UV2]], [[UV6]] + ; CHECK-NEXT: [[SDIV3:%[0-9]+]]:_(s32) = G_SDIV [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32), [[SDIV2]](s32), [[SDIV3]](s32) + ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = G_SDIV %0, %1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-cse.mir @@ -6,12 +6,12 @@ bb.0.entry: ; CHECK-LABEL: name: test_cse_in_legalizer ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; CHECK: $w0 = COPY [[COPY1]](s32) - ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; CHECK-NEXT: $w0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) %0:_(s64) = COPY $x0 %1:_(s8) = G_TRUNC %0(s64) %19:_(s32) = G_ZEXT %1(s8) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext-csedebug-output.mir @@ -10,11 +10,11 @@ ; CHECK: CSEInfo::Add MI: %{{[0-9]+}}:_(s32) = G_ZEXT ; CHECK: CSEInfo::Add MI: %{{[0-9]+}}:_(s8) = G_TRUNC ; CHECK: CSEInfo::Add MI: %{{[0-9]+}}:_(s32) = G_ZEXT - ; CHECK: CSEInfo::Recording new MI G_CONSTANT ; CHECK: CSEInfo::Recording new MI G_TRUNC + ; CHECK: CSEInfo::Recording new MI G_CONSTANT ; CHECK: CSEInfo::Recording new MI G_AND - ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_CONSTANT ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_TRUNC + ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_CONSTANT ; CHECK: CSEInfo::Found Instr %{{[0-9]+}}:_(s32) = G_AND ; CHECK: CSEInfo::CSE Hit for Opc {{[0-9]+}} : 1 ; CHECK: CSEInfo::CSE Hit for Opc {{[0-9]+}} : 1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir @@ -24,14 +24,14 @@ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC4]], 1 ; CHECK-NEXT: $w0 = COPY [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC5]], [[C1]] ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: $w0 = COPY [[TRUNC6]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC7]], [[C2]] ; CHECK-NEXT: $w0 = COPY [[AND2]](s32) ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir @@ -291,8 +291,8 @@ ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CHECK-NEXT: %ext:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: $w0 = COPY %ext(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir @@ -7,7 +7,9 @@ liveins: $x0 ; CHECK-LABEL: name: test_freeze_s64 - ; CHECK: %x0:_(s64) = COPY $x0 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x0:_(s64) = COPY $x0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %x0 ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64) %x0:_(s64) = COPY $x0 @@ -21,7 +23,9 @@ liveins: $q0 ; CHECK-LABEL: name: test_freeze_v4s32 - ; CHECK: %q0:_(<4 x s32>) = COPY $q0 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %q0:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE %q0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>) ; CHECK-NEXT: $x0 = COPY [[UV]](<2 x s32>) @@ -56,7 +60,9 @@ liveins: $d0 ; CHECK-LABEL: name: test_freeze_v2s32 - ; CHECK: %d0:_(<2 x s32>) = COPY $d0 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %d0:_(<2 x s32>) = COPY $d0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE %d0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<2 x s32>) ; CHECK-NEXT: $w0 = COPY [[UV]](s32) @@ -74,7 +80,9 @@ liveins: $d0 ; CHECK-LABEL: name: test_freeze_v8s8 - ; CHECK: %d0:_(<8 x s8>) = COPY $d0 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %d0:_(<8 x s8>) = COPY $d0 ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<8 x s8>) = G_FREEZE %d0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[FREEZE]](<8 x s8>) ; CHECK-NEXT: $w0 = COPY [[UV]](<4 x s8>) @@ -91,10 +99,12 @@ bb.0.entry: liveins: $x0 ; CHECK-LABEL: name: test_freeze_s1 - ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[FREEZE]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %ext:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $x0 = COPY %ext(s64) %x:_(s1) = G_IMPLICIT_DEF @@ -108,10 +118,12 @@ bb.0.entry: liveins: $x0 ; CHECK-LABEL: name: test_freeze_s2 - ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[FREEZE]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CHECK-NEXT: %ext:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $x0 = COPY %ext(s64) %x:_(s2) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir @@ -21,15 +21,13 @@ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C3]](s64) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s64) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -66,15 +64,13 @@ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C3]](s64) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s64) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir @@ -23,12 +23,10 @@ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -67,12 +65,10 @@ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[COPY3]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] ; CHECK-NEXT: $w0 = COPY [[OR]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir @@ -66,9 +66,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-min-max.mir @@ -18,9 +18,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -47,9 +45,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -76,9 +72,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -105,9 +99,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -136,9 +128,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -165,9 +155,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -194,9 +182,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -223,9 +209,7 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[COPY1]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir @@ -274,12 +274,12 @@ ; CHECK-LABEL: name: test_uitofp_v2s64_v2i1 ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<2 x s64>) = G_UITOFP [[AND]](<2 x s64>) ; CHECK-NEXT: $q0 = COPY [[UITOFP]](<2 x s64>) %0:_(<2 x s1>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -688,8 +688,8 @@ ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD %ptr(p0) :: (load (s8)) ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[LOAD]], 1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASSERT_ZEXT]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %ext:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: $x0 = COPY %ext(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -6,14 +6,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_merge_s4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s64) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) %0:_(s64) = G_CONSTANT i64 0 @@ -29,7 +26,6 @@ body: | bb.0: - ; This isn't legal but we don't support widening the destination type. ; CHECK-LABEL: name: test_merge_s16_s8 ; CHECK: %a:_(s32) = COPY $w0 ; CHECK-NEXT: %b:_(s32) = COPY $w1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -131,8 +131,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %lhs_wide, [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND %rhs_wide, [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16777215 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %lhs_wide(s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16777215 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %rhs_wide(s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C1]] @@ -223,8 +223,8 @@ ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] ; CHECK-NEXT: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store (s64), align 1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: $x0 = COPY [[MUL]](s64) ; CHECK-NEXT: $x1 = COPY [[AND]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir @@ -52,8 +52,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p0) = G_PHI %6(p0), %bb.2, [[DEF]](p0), %bb.0 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI %22(s16), %bb.2, [[DEF1]](s16), %bb.0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.3 ; CHECK-NEXT: {{ $}} @@ -71,10 +71,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ZEXT1]](s32), [[COPY]] ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PHI]], [[C2]](s64) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ICMP1]](s32) - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.3 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.3 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.bb10: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir @@ -32,8 +32,7 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -51,10 +50,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.1, [[TRUNC1]](s16), %bb.2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: ; Test that we insert legalization artifacts(Truncs here) into the correct BBs @@ -185,8 +184,7 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -203,10 +201,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.1, [[TRUNC1]](s16), %bb.2 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -281,14 +279,13 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[COPY]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] - ; CHECK-NEXT: $w0 = COPY [[AND2]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] + ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x80000000) @@ -342,13 +339,11 @@ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[C]](s16), %bb.0, [[PHI]](s16), %bb.1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[COPY]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND1]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: $w0 = COPY [[AND]](s32) @@ -412,8 +407,7 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -433,12 +427,12 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC1]](s16), %bb.1, [[TRUNC2]](s16), %bb.2 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.1, [[C3]](s16), %bb.2 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C4]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND1]], [[AND2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] ; CHECK-NEXT: $w0 = COPY [[ADD2]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: @@ -524,31 +518,28 @@ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC1]](s16), %bb.0, %22(s16), %bb.1 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND1]], [[C2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[ADD1]](s32), [[C3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C5]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 43 - ; CHECK-NEXT: G_BRCOND [[AND2]](s32), %bb.2 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 43 + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[PHI]](s16), %bb.1, [[TRUNC]](s16), %bb.0 - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C7]] - ; CHECK-NEXT: $w0 = COPY [[AND3]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C6]] + ; CHECK-NEXT: $w0 = COPY [[AND1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: successors: %bb.1(0x40000000), %bb.3(0x40000000) @@ -671,8 +662,8 @@ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 ; CHECK-NEXT: G_BR %bb.2 @@ -727,8 +718,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr1, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C1]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 @@ -778,8 +769,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 @@ -824,8 +815,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF1]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 @@ -919,8 +910,8 @@ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr1, [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD1]](<2 x s64>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C1]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2 ; CHECK-NEXT: G_BR %bb.1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptrtoint.mir @@ -83,8 +83,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT %ptr(p0) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[PTRTOINT]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: %ext:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: $w0 = COPY %ext(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir @@ -96,8 +96,8 @@ ; CHECK-LABEL: name: test_urem_1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sadde.mir @@ -11,12 +11,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[SADDE]](s64) @@ -47,14 +46,12 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UADDE3]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDE1]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[UADDE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE2]](s64) @@ -89,8 +86,8 @@ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddo.mir @@ -11,9 +11,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[SADDE]](s64) @@ -42,11 +40,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDO1]] + ; CHECK-NEXT: [[SADDE:%[0-9]+]]:_(s64), [[SADDE1:%[0-9]+]]:_(s32) = G_SADDE [[COPY2]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir @@ -18,9 +18,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SADDO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SADDO1]], [[C2]] - ; CHECK-NEXT: %saddsat:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SADDO]] + ; CHECK-NEXT: %saddsat:_(s32) = G_SELECT [[SADDO1]](s32), [[ADD]], [[SADDO]] ; CHECK-NEXT: $w0 = COPY %saddsat(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 @@ -46,9 +44,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SADDO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SADDO1]], [[C2]] - ; CHECK-NEXT: %saddsat:_(s64) = G_SELECT [[AND]](s32), [[ADD]], [[SADDO]] + ; CHECK-NEXT: %saddsat:_(s64) = G_SELECT [[SADDO1]](s32), [[ADD]], [[SADDO]] ; CHECK-NEXT: $x0 = COPY %saddsat(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %x:_(s64) = COPY $x0 @@ -80,9 +76,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -117,8 +111,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -153,9 +146,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -187,13 +178,10 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s64), [[SEXT_INREG2]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SEXT_INREG2]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[ADD1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[ADD1]], [[ADD]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s64) = COPY $x0 @@ -227,9 +215,7 @@ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C1]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[UADDO1]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 24 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG2]] ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32) @@ -240,11 +226,10 @@ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV2]], 24 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C1]](s64) ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s64), [[UADDO3:%[0-9]+]]:_(s32) = G_UADDO [[ASHR]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO2]], [[UADDO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO2]], [[UADDO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -125,20 +125,18 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[AND]], 1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY2]](s32), [[C3]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY2]](s32), [[C2]](s64) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(0, 0, 0, 0) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND1]], [[AND2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND]], [[AND1]] ; CHECK-NEXT: $q0 = COPY [[OR]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 @@ -166,9 +164,7 @@ ; CHECK-NEXT: %b:_(s32) = COPY $w1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), %a(s32), %b ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[DEF]], [[DEF]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[DEF]], [[DEF]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %a:_(s32) = COPY $w0 @@ -311,17 +307,17 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[BUILD_VECTOR1]](<4 x s1>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[ANYEXT1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond0(<4 x s1>) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT3]], [[ANYEXT4]] + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond0(<4 x s1>) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT2]], [[ANYEXT3]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond1(<4 x s1>) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s1>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT5]], [[ANYEXT6]] + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond1(<4 x s1>) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s1>) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT4]], [[ANYEXT5]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND1]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC3]](<4 x s1>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s1>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ANYEXT7]], [[ANYEXT8]] + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC3]](<4 x s1>) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s1>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ANYEXT6]], [[ANYEXT7]] ; CHECK-NEXT: %select:_(<4 x s1>) = G_TRUNC [[OR]](<4 x s16>) ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_ZEXT %select(<4 x s1>) ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>) @@ -359,22 +355,20 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[C]](p0), [[C]](p0) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>) ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[BUILD_VECTOR]](<2 x p0>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[AND]], 1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXT_INREG]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT]](s64), [[C2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT]](s64), [[C1]](s64) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 0) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT]], [[SHUF]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT1]], [[XOR]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND1]], [[AND2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT]], [[SHUF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[PTRTOINT1]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]] ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[OR]](<2 x s64>) ; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -7,8 +7,8 @@ ; CHECK-LABEL: name: test_shift ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 @@ -109,13 +109,9 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[SHL]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[OR]], [[SHL2]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[UV1]], [[SELECT1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[SHL]], [[C1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[SHL2]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[UV1]], [[SELECT1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) ; CHECK-NEXT: $q0 = COPY [[MV]](s128) %0:_(s128) = COPY $q0 @@ -145,13 +141,9 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[LSHR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C2]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UV]], [[SELECT]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[UV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[LSHR]], [[C1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; CHECK-NEXT: $q0 = COPY [[MV]](s128) %0:_(s128) = COPY $q0 @@ -183,13 +175,9 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s64) ; CHECK-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[ASHR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C3]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UV]], [[SELECT]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[ASHR]], [[ASHR1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[ASHR2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[UV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[ASHR]], [[ASHR1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; CHECK-NEXT: $q0 = COPY [[MV]](s128) %0:_(s128) = COPY $q0 @@ -236,7 +224,9 @@ liveins: $w0 ; CHECK-LABEL: name: shl_cimm_32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s64) ; CHECK-NEXT: $w0 = COPY [[SHL]](s32) @@ -255,7 +245,9 @@ liveins: $w0 ; CHECK-LABEL: name: lshr_cimm_32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s64) ; CHECK-NEXT: $w0 = COPY [[LSHR]](s32) @@ -274,7 +266,9 @@ liveins: $w0 ; CHECK-LABEL: name: ashr_cimm_32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: $w0 = COPY [[ASHR]](s32) @@ -496,11 +490,8 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[SUB1]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[MV1]], [[SUB]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C3]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[LSHR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C3]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[MV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR]], [[LSHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[MV]], [[SELECT]] ; CHECK-NEXT: %d1:_(s32), %d2:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) ; CHECK-NEXT: $w0 = COPY %d2(s32) %0:_(s64) = COPY $x0 @@ -529,7 +520,6 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[C2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s64), [[C1]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[COPY]], [[C3]] ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s64) = G_SUB [[C3]], [[COPY]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[C3]] @@ -538,14 +528,11 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV1]], [[SUB3]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[MV1]], [[SUB2]](s64) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP2]], [[C4]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[OR]], [[LSHR1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP3]], [[C4]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[MV]], [[SELECT]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s32), [[OR]], [[LSHR1]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s32), [[MV]], [[SELECT]] ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[SUB1]](s64), [[C3]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[MV2]], [[SUB1]](s64) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ICMP4]], [[C4]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[AND2]](s32), [[SHL1]], [[C1]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s32), [[SHL1]], [[C1]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SELECT1]], [[SELECT2]] ; CHECK-NEXT: [[SUB4:%[0-9]+]]:_(s64) = G_SUB [[SUB]], [[C3]] ; CHECK-NEXT: [[SUB5:%[0-9]+]]:_(s64) = G_SUB [[C3]], [[SUB]] @@ -555,14 +542,10 @@ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[C1]], [[SUB5]](s64) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL2]] ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[C1]], [[SUB4]](s64) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP5]], [[C4]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[AND3]](s32), [[OR2]], [[LSHR3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ICMP6]], [[C4]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV2]], [[SELECT3]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C4]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[AND5]](s32), [[OR1]], [[SELECT4]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ICMP1]], [[C4]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[AND6]](s32), [[MV]], [[SELECT5]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s32), [[OR2]], [[LSHR3]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s32), [[MV2]], [[SELECT3]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[OR1]], [[SELECT4]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s32), [[MV]], [[SELECT5]] ; CHECK-NEXT: %d1:_(s32), %d2:_(s32) = G_UNMERGE_VALUES [[SELECT6]](s64) ; CHECK-NEXT: $w0 = COPY %d2(s32) %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir @@ -12,16 +12,16 @@ ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY]](s64) ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[INTTOPTR]](p0) ; CHECK-NEXT: $x0 = COPY [[PTRTOINT]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC4]], [[C1]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[TRUNC2]], [[TRUNC3]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) @@ -101,7 +101,9 @@ liveins: $x0, $x1 ; CHECK-LABEL: name: bitcast128 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[MV]](s128) @@ -122,7 +124,9 @@ liveins: $x0 ; CHECK-LABEL: name: testExtOfCopyOfTrunc - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -140,7 +144,9 @@ liveins: $x0 ; CHECK-LABEL: name: testExtOf2CopyOfTrunc - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssube.mir @@ -11,12 +11,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[SSUBE]](s64) @@ -47,14 +46,12 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[USUBE3]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBE1]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[USUBE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE2]](s64) @@ -89,8 +86,8 @@ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 8 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubo.mir @@ -11,9 +11,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY1]], [[COPY3]], [[USUBO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[SSUBE]](s64) @@ -42,11 +40,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBO1]] + ; CHECK-NEXT: [[SSUBE:%[0-9]+]]:_(s64), [[SSUBE1:%[0-9]+]]:_(s32) = G_SSUBE [[COPY2]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[SSUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir @@ -18,9 +18,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SSUBO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSUBO1]], [[C2]] - ; CHECK-NEXT: %ssubsat:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SSUBO]] + ; CHECK-NEXT: %ssubsat:_(s32) = G_SELECT [[SSUBO1]](s32), [[ADD]], [[SSUBO]] ; CHECK-NEXT: $w0 = COPY %ssubsat(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 @@ -46,9 +44,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SSUBO]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SSUBO1]], [[C2]] - ; CHECK-NEXT: %ssubsat:_(s64) = G_SELECT [[AND]](s32), [[ADD]], [[SSUBO]] + ; CHECK-NEXT: %ssubsat:_(s64) = G_SELECT [[SSUBO1]](s32), [[ADD]], [[SSUBO]] ; CHECK-NEXT: $x0 = COPY %ssubsat(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %x:_(s64) = COPY $x0 @@ -80,9 +76,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -117,8 +111,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -153,9 +146,7 @@ ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 @@ -187,13 +178,10 @@ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s64), [[SEXT_INREG2]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SEXT_INREG2]](s64) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[ADD]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[ADD]], [[SUB]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s64) = COPY $x0 @@ -227,9 +215,7 @@ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C1]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[USUBO1]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 24 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG2]] ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32) @@ -240,11 +226,10 @@ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV2]], 24 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[C1]](s64) ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[ASHR]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[USUBO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[UADDO]], [[USUBO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir @@ -11,9 +11,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[USUBO1]] ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) %0:_(s64) = COPY $x0 @@ -38,11 +36,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBO1]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x2 = COPY [[USUBE2]](s64) @@ -136,8 +131,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<2 x s32>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[sub]](<2 x s32>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<2 x s32>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[SUB]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 @@ -163,15 +158,15 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[IMPLICIT_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT0]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[IMPLICIT_DEF]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[VAL0:%[0-9]+]]:_(s16), [[VAL1:%[0-9]+]]:_(s16), [[VAL2:%[0-9]+]]:_(s16), [[VAL3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SUB]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[VAL0]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SUB]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 @@ -200,8 +195,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<4 x s16>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[sub]](<4 x s16>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[SUB]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = COPY $d1 @@ -230,8 +225,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<8 x s8>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[COPY]](<8 x s8>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[COPY2]](<8 x s8>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<8 x s8>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[sub]](<8 x s8>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s8>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[SUB]](<8 x s8>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<8 x s8>) = COPY $d0 %1:_(<8 x s8>) = COPY $d1 @@ -260,8 +255,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<16 x s8>) = COPY $q3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY]](<16 x s8>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY2]](<16 x s8>), [[COPY3]] - ; CHECK-NEXT: [[sub:%[0-9]+]]:_(<16 x s8>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $q0 = COPY [[sub]](<16 x s8>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<16 x s8>) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $q0 = COPY [[SUB]](<16 x s8>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<16 x s8>) = COPY $q0 %1:_(<16 x s8>) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadd-sat.mir @@ -18,9 +18,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[C]], [[UADDO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[UADDO1]](s32), [[C]], [[UADDO]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -48,9 +46,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[C]], [[UADDO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[UADDO1]](s32), [[C]], [[UADDO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -83,9 +79,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -121,9 +115,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -162,9 +154,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[COPY2]], [[ADD]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY2]], [[ADD]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uadde.mir @@ -11,12 +11,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE3]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE2]](s64) @@ -47,14 +46,12 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UADDE3]], [[C]] - ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s64), [[UADDE5:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDE1]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s64), [[UADDE5:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[UADDE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE5]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDE]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE2]](s64) @@ -85,13 +82,13 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C1]] ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[AND]], [[AND1]], [[AND2]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UADDE]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-uaddo.mir @@ -11,9 +11,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY3]], [[UADDO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE1]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) @@ -42,11 +40,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UADDO1]], [[C]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UADDE1]], [[C]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[COPY1]], [[COPY2]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s32) = G_UADDE [[COPY2]], [[COPY3]], [[UADDE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[UADDE3]](s32) ; CHECK-NEXT: $x0 = COPY [[UADDO]](s64) ; CHECK-NEXT: $x1 = COPY [[UADDE]](s64) @@ -74,8 +69,8 @@ ; CHECK-LABEL: name: test_scalar_uaddo_small ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usub-sat.mir @@ -18,9 +18,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[C]], [[USUBO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[USUBO1]](s32), [[C]], [[USUBO]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -48,9 +46,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND]](s32), [[C]], [[USUBO]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[USUBO1]](s32), [[C]], [[USUBO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(s64) = COPY $x0 @@ -83,9 +79,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -121,9 +115,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 @@ -159,9 +151,7 @@ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C2]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND3]](s32), [[C1]], [[SUB]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C1]], [[SUB]] ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %2:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usube.mir @@ -11,12 +11,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE3]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE2]](s64) @@ -47,14 +46,12 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[USUBE3]], [[C]] - ; CHECK-NEXT: [[USUBE4:%[0-9]+]]:_(s64), [[USUBE5:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[AND2]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBE1]] + ; CHECK-NEXT: [[USUBE4:%[0-9]+]]:_(s64), [[USUBE5:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[USUBE3]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE5]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBE]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE2]](s64) @@ -85,13 +82,13 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C1]] ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[AND]], [[AND1]], [[AND2]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[USUBE]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-usubo.mir @@ -11,9 +11,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[AND]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY3]], [[USUBO1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE1]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) @@ -42,11 +40,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s32) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[USUBO1]], [[C]] - ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[AND]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[USUBE1]], [[C]] - ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[AND1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[COPY1]], [[COPY2]], [[USUBO1]] + ; CHECK-NEXT: [[USUBE2:%[0-9]+]]:_(s64), [[USUBE3:%[0-9]+]]:_(s32) = G_USUBE [[COPY2]], [[COPY3]], [[USUBE1]] ; CHECK-NEXT: %carry_out_ext:_(s64) = G_ANYEXT [[USUBE3]](s32) ; CHECK-NEXT: $x0 = COPY [[USUBO]](s64) ; CHECK-NEXT: $x1 = COPY [[USUBE]](s64) @@ -74,8 +69,8 @@ ; CHECK-LABEL: name: test_scalar_usubo_small ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir @@ -17,8 +17,8 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI %33(s16), %bb.2, [[DEF]](s16), %bb.0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 46 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir b/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir @@ -10,8 +10,7 @@ ; CHECK: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ogt), [[COPY]](s32), [[COPY1]] ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[FCMP]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] - ; CHECK: $w0 = COPY [[AND1]](s32) + ; CHECK: $w0 = COPY [[AND]](s32) %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(s1) = G_FCMP floatpred(ogt), %0(s32), %1 diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll --- a/llvm/test/CodeGen/AArch64/zext.ll +++ b/llvm/test/CodeGen/AArch64/zext.ll @@ -1200,7 +1200,7 @@ ; ; CHECK-GI-LABEL: zext_v16i10_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov s7, w0 +; CHECK-GI-NEXT: fmov s16, w0 ; CHECK-GI-NEXT: fmov s17, w2 ; CHECK-GI-NEXT: ldr s0, [sp] ; CHECK-GI-NEXT: fmov s18, w4 @@ -1211,33 +1211,33 @@ ; CHECK-GI-NEXT: ldr s4, [sp, #32] ; CHECK-GI-NEXT: ldr s5, [sp, #40] ; CHECK-GI-NEXT: ldr s6, [sp, #48] -; CHECK-GI-NEXT: ldr s16, [sp, #56] -; CHECK-GI-NEXT: mov v7.s[1], w1 +; CHECK-GI-NEXT: ldr s7, [sp, #56] +; CHECK-GI-NEXT: mov v16.s[1], w1 ; CHECK-GI-NEXT: mov v17.s[1], w3 ; CHECK-GI-NEXT: mov v18.s[1], w5 ; CHECK-GI-NEXT: mov v19.s[1], w7 ; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] ; CHECK-GI-NEXT: mov v2.s[1], v3.s[0] ; CHECK-GI-NEXT: mov v4.s[1], v5.s[0] -; CHECK-GI-NEXT: mov v6.s[1], v16.s[0] +; CHECK-GI-NEXT: mov v6.s[1], v7.s[0] ; CHECK-GI-NEXT: adrp x8, .LCPI54_0 -; CHECK-GI-NEXT: ldr q16, [x8, :lo12:.LCPI54_0] -; CHECK-GI-NEXT: ushll v1.2d, v7.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v16.2s, #0 ; CHECK-GI-NEXT: ushll v3.2d, v17.2s, #0 ; CHECK-GI-NEXT: ushll v5.2d, v18.2s, #0 ; CHECK-GI-NEXT: ushll v7.2d, v19.2s, #0 -; CHECK-GI-NEXT: ushll v17.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v16.2d, v0.2s, #0 ; CHECK-GI-NEXT: ushll v18.2d, v2.2s, #0 ; CHECK-GI-NEXT: ushll v19.2d, v4.2s, #0 ; CHECK-GI-NEXT: ushll v20.2d, v6.2s, #0 -; CHECK-GI-NEXT: and v0.16b, v1.16b, v16.16b -; CHECK-GI-NEXT: and v1.16b, v3.16b, v16.16b -; CHECK-GI-NEXT: and v2.16b, v5.16b, v16.16b -; CHECK-GI-NEXT: and v3.16b, v7.16b, v16.16b -; CHECK-GI-NEXT: and v4.16b, v17.16b, v16.16b -; CHECK-GI-NEXT: and v5.16b, v18.16b, v16.16b -; CHECK-GI-NEXT: and v6.16b, v19.16b, v16.16b -; CHECK-GI-NEXT: and v7.16b, v20.16b, v16.16b +; CHECK-GI-NEXT: ldr q17, [x8, :lo12:.LCPI54_0] +; CHECK-GI-NEXT: and v0.16b, v1.16b, v17.16b +; CHECK-GI-NEXT: and v1.16b, v3.16b, v17.16b +; CHECK-GI-NEXT: and v2.16b, v5.16b, v17.16b +; CHECK-GI-NEXT: and v3.16b, v7.16b, v17.16b +; CHECK-GI-NEXT: and v4.16b, v16.16b, v17.16b +; CHECK-GI-NEXT: and v5.16b, v18.16b, v17.16b +; CHECK-GI-NEXT: and v6.16b, v19.16b, v17.16b +; CHECK-GI-NEXT: and v7.16b, v20.16b, v17.16b ; CHECK-GI-NEXT: ret entry: %c = zext <16 x i10> %a to <16 x i64> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir @@ -50,8 +50,8 @@ ; CHECK-NEXT: %and5:_(s1) = G_XOR %unmerge3_5, %negone ; CHECK-NEXT: %and6:_(s1) = G_XOR %unmerge3_6, %negone ; CHECK-NEXT: %and7:_(s1) = G_XOR %unmerge3_7, %negone - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT %and0(s1) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C10]] ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT %and1(s1) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT1]], [[C10]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -139,8 +139,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -549,11 +548,9 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[LSHR]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND1]](s32), [[LSHR1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64), implicit [[MV1]](s64) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 @@ -1305,8 +1302,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] @@ -1453,8 +1450,8 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -31,10 +31,10 @@ ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT1]], [[C]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) @@ -55,13 +55,13 @@ ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BITCAST]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -134,8 +134,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 @@ -158,8 +158,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 @@ -181,8 +181,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -209,12 +209,12 @@ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -241,12 +241,12 @@ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV3]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT]], 1 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ANYEXT1]], 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -269,12 +269,12 @@ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 8 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -1327,9 +1327,9 @@ ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2 ; GFX9-NEXT: v_and_b32_e32 v8, 7, v2 ; GFX9-NEXT: v_not_b32_e32 v2, v2 -; GFX9-NEXT: s_mov_b32 s5, 1 +; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX9-NEXT: v_lshrrev_b16_sdwa v10, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v10, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_lshlrev_b16_e32 v8, v8, v0 ; GFX9-NEXT: v_lshrrev_b16_e32 v2, v2, v10 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 @@ -1338,7 +1338,7 @@ ; GFX9-NEXT: v_not_b32_e32 v5, v5 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 -; GFX9-NEXT: v_lshrrev_b16_sdwa v4, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v4, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_mov_b32_e32 v9, 0xff ; GFX9-NEXT: v_lshlrev_b16_e32 v3, v8, v3 ; GFX9-NEXT: v_lshrrev_b16_e32 v4, v5, v4 @@ -1360,9 +1360,9 @@ ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v6, v1 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 -; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1 +; GFX9-NEXT: v_and_or_b32 v1, v2, s5, v1 ; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v4 ; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 @@ -1807,48 +1807,47 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { ; GFX6-LABEL: s_fshl_v2i24: ; GFX6: ; %bb.0: -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX6-NEXT: s_lshr_b32 s6, s0, 16 ; GFX6-NEXT: s_lshr_b32 s7, s0, 24 ; GFX6-NEXT: s_and_b32 s9, s0, 0xff ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX6-NEXT: s_lshl_b32 s0, s0, 8 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff +; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX6-NEXT: s_or_b32 s0, s9, s0 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: s_lshr_b32 s8, s1, 8 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_or_b32 s0, s0, s6 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff ; GFX6-NEXT: s_or_b32 s1, s7, s1 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: s_or_b32 s1, s1, s6 ; GFX6-NEXT: s_lshr_b32 s6, s2, 16 ; GFX6-NEXT: s_lshr_b32 s7, s2, 24 ; GFX6-NEXT: s_and_b32 s9, s2, 0xff ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008 -; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff ; GFX6-NEXT: s_or_b32 s2, s9, s2 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX6-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX6-NEXT: s_lshr_b32 s8, s3, 8 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff +; GFX6-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX6-NEXT: s_or_b32 s2, s2, s6 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff -; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_or_b32 s3, s7, s3 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 @@ -1858,78 +1857,77 @@ ; GFX6-NEXT: s_lshr_b32 s7, s4, 24 ; GFX6-NEXT: s_and_b32 s9, s4, 0xff ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008 +; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX6-NEXT: s_lshl_b32 s4, s4, 8 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX6-NEXT: s_or_b32 s4, s9, s4 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_or_b32 s4, s4, s6 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX6-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_lshr_b32 s8, s5, 8 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX6-NEXT: s_and_b32 s5, s5, 0xff -; GFX6-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX6-NEXT: s_lshl_b32 s5, s5, 8 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: s_or_b32 s5, s7, s5 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX6-NEXT: s_or_b32 s5, s5, s6 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v1 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 ; GFX6-NEXT: s_lshr_b32 s0, s2, 1 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX6-NEXT: s_lshr_b32 s0, s3, 1 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_lshl_b32_e32 v1, s1, v1 +; GFX6-NEXT: v_lshl_b32_e32 v0, s1, v0 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_readfirstlane_b32 s0, v1 +; GFX6-NEXT: v_readfirstlane_b32 s1, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_fshl_v2i24: @@ -1942,9 +1940,7 @@ ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 ; GFX8-NEXT: s_or_b32 s0, s0, s6 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s9, s1, 8 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 @@ -1952,24 +1948,24 @@ ; GFX8-NEXT: s_or_b32 s0, s0, s6 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX8-NEXT: s_or_b32 s1, s8, s1 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: s_or_b32 s1, s1, s6 ; GFX8-NEXT: s_lshr_b32 s6, s2, 8 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff ; GFX8-NEXT: s_lshr_b32 s7, s2, 16 ; GFX8-NEXT: s_lshr_b32 s8, s2, 24 ; GFX8-NEXT: s_and_b32 s2, s2, 0xff ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX8-NEXT: s_or_b32 s2, s2, s6 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX8-NEXT: s_lshr_b32 s9, s3, 8 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 @@ -1977,11 +1973,12 @@ ; GFX8-NEXT: s_or_b32 s2, s2, s6 ; GFX8-NEXT: s_lshl_b32 s3, s3, 8 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff +; GFX8-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX8-NEXT: s_or_b32 s3, s8, s3 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX8-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_or_b32 s3, s3, s6 ; GFX8-NEXT: s_lshr_b32 s6, s4, 8 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff @@ -1989,212 +1986,207 @@ ; GFX8-NEXT: s_lshr_b32 s8, s4, 24 ; GFX8-NEXT: s_and_b32 s4, s4, 0xff ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 +; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX8-NEXT: s_or_b32 s4, s4, s6 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 ; GFX8-NEXT: s_or_b32 s4, s4, s6 -; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 +; GFX8-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s9, s5, 8 -; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX8-NEXT: s_and_b32 s5, s5, 0xff -; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX8-NEXT: s_lshl_b32 s5, s5, 8 -; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: s_or_b32 s5, s8, s5 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX8-NEXT: s_or_b32 s5, s5, s6 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v1 +; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0 ; GFX8-NEXT: s_lshr_b32 s0, s2, 1 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1 -; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX8-NEXT: s_lshr_b32 s0, s3, 1 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s1 +; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s1 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_mov_b32_e32 v2, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_mov_b32_e32 v4, 16 -; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v1 +; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_readfirstlane_b32 s0, v1 +; GFX8-NEXT: v_readfirstlane_b32 s1, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_fshl_v2i24: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s7, s0, 8 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff ; GFX9-NEXT: s_lshr_b32 s9, s0, 16 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s10, s0, 24 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 ; GFX9-NEXT: s_or_b32 s0, s0, s7 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 ; GFX9-NEXT: s_lshr_b32 s11, s1, 8 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff -; GFX9-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX9-NEXT: s_or_b32 s0, s0, s7 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX9-NEXT: s_or_b32 s1, s10, s1 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s1, s1, s7 ; GFX9-NEXT: s_lshr_b32 s7, s2, 8 -; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff ; GFX9-NEXT: s_lshr_b32 s9, s2, 16 ; GFX9-NEXT: s_lshr_b32 s10, s2, 24 ; GFX9-NEXT: s_and_b32 s2, s2, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX9-NEXT: s_or_b32 s2, s2, s7 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff +; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX9-NEXT: s_lshr_b32 s11, s3, 8 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_and_b32 s3, s3, 0xff -; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX9-NEXT: s_or_b32 s2, s2, s7 ; GFX9-NEXT: s_lshl_b32 s3, s3, 8 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX9-NEXT: s_or_b32 s3, s10, s3 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s3, s3, s7 ; GFX9-NEXT: s_lshr_b32 s7, s4, 8 -; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff -; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s9, s4, 16 ; GFX9-NEXT: s_lshr_b32 s10, s4, 24 ; GFX9-NEXT: s_and_b32 s4, s4, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX9-NEXT: s_or_b32 s4, s4, s7 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 -; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX9-NEXT: s_or_b32 s4, s4, s7 -; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX9-NEXT: s_lshr_b32 s11, s5, 8 ; GFX9-NEXT: s_and_b32 s5, s5, 0xff -; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_lshl_b32 s5, s5, 8 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff ; GFX9-NEXT: s_or_b32 s5, s10, s5 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX9-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s5, s5, s7 -; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 -; GFX9-NEXT: s_lshr_b32 s2, s2, 1 -; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2 -; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 -; GFX9-NEXT: v_lshl_or_b32 v0, s0, v0, v2 -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1 +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1 -; GFX9-NEXT: s_lshr_b32 s0, s3, 1 +; GFX9-NEXT: s_lshr_b32 s2, s2, 1 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2 +; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0 +; GFX9-NEXT: v_lshl_or_b32 v1, s0, v1, v2 +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 +; GFX9-NEXT: s_lshr_b32 s0, s3, 1 +; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s0 ; GFX9-NEXT: s_mov_b32 s6, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, s1, v1, v2 +; GFX9-NEXT: v_lshl_or_b32 v0, s1, v0, v2 ; GFX9-NEXT: s_mov_b32 s8, 16 ; GFX9-NEXT: s_movk_i32 s0, 0xff -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX9-NEXT: v_and_or_b32 v2, v0, s0, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX9-NEXT: v_and_or_b32 v2, v1, s0, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 -; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX9-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, 8, v2 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_fshl_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX10-NEXT: s_lshr_b32 s6, s0, 8 ; GFX10-NEXT: s_lshr_b32 s7, s0, 16 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff -; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s6, s6, 8 ; GFX10-NEXT: s_and_b32 s7, s7, 0xff @@ -2202,244 +2194,251 @@ ; GFX10-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX10-NEXT: s_lshr_b32 s7, s4, 8 ; GFX10-NEXT: s_lshr_b32 s10, s4, 16 -; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX10-NEXT: s_and_b32 s7, s7, 0xff ; GFX10-NEXT: s_lshr_b32 s11, s4, 24 +; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: s_lshl_b32 s7, s7, 8 ; GFX10-NEXT: s_lshr_b32 s12, s5, 8 +; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: s_or_b32 s4, s4, s7 -; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 ; GFX10-NEXT: s_and_b32 s7, s10, 0xff ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 +; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX10-NEXT: s_and_b32 s5, s5, 0xff ; GFX10-NEXT: s_lshl_b32 s7, s7, 16 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8 -; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX10-NEXT: s_or_b32 s4, s4, s7 ; GFX10-NEXT: s_and_b32 s7, s12, 0xff +; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX10-NEXT: v_mul_hi_u32 v3, v0, v3 ; GFX10-NEXT: s_or_b32 s5, s11, s5 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 -; GFX10-NEXT: s_or_b32 s5, s5, s7 ; GFX10-NEXT: s_lshr_b32 s9, s1, 8 +; GFX10-NEXT: s_or_b32 s5, s5, s7 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v3 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff -; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX10-NEXT: s_lshl_b32 s1, s1, 8 ; GFX10-NEXT: s_and_b32 s7, s9, 0xff +; GFX10-NEXT: s_lshl_b32 s1, s1, 8 +; GFX10-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX10-NEXT: s_or_b32 s1, s8, s1 ; GFX10-NEXT: s_lshr_b32 s8, s2, 8 ; GFX10-NEXT: s_lshr_b32 s9, s2, 16 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff -; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX10-NEXT: s_lshr_b32 s10, s2, 24 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff +; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX10-NEXT: s_lshl_b32 s8, s8, 8 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX10-NEXT: s_or_b32 s2, s2, s8 ; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 +; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX10-NEXT: s_lshl_b32 s6, s6, 16 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, s5, v0 ; GFX10-NEXT: s_lshr_b32 s4, s3, 8 ; GFX10-NEXT: s_and_b32 s5, s9, 0xff ; GFX10-NEXT: s_and_b32 s3, s3, 0xff -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_lshl_b32 s5, s5, 16 ; GFX10-NEXT: s_or_b32 s3, s10, s3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_or_b32 s2, s2, s5 ; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: s_lshr_b32 s2, s2, 1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_or_b32 s3, s3, s4 -; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX10-NEXT: s_lshl_b32 s6, s6, 16 +; GFX10-NEXT: s_lshr_b32 s2, s2, 1 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX10-NEXT: s_lshl_b32 s7, s7, 16 ; GFX10-NEXT: s_or_b32 s0, s0, s6 +; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX10-NEXT: s_or_b32 s1, s1, s7 ; GFX10-NEXT: v_lshrrev_b32_e64 v2, v2, s2 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX10-NEXT: s_lshr_b32 s2, s3, 1 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: v_lshl_or_b32 v0, s0, v0, v2 +; GFX10-NEXT: v_lshl_or_b32 v1, s0, v1, v2 ; GFX10-NEXT: v_lshrrev_b32_e64 v3, v3, s2 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX10-NEXT: v_lshl_or_b32 v1, s1, v1, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshl_or_b32 v0, s1, v0, v3 ; GFX10-NEXT: s_mov_b32 s0, 16 -; GFX10-NEXT: v_and_or_b32 v2, v0, 0xff, v2 -; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX10-NEXT: v_and_or_b32 v2, v1, 0xff, v2 +; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_lshl_or_b32 v0, v0, 8, v4 +; GFX10-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX10-NEXT: v_readfirstlane_b32 s1, v0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: s_fshl_v2i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX11-NEXT: s_lshr_b32 s6, s0, 8 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff -; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX11-NEXT: s_lshr_b32 s8, s0, 24 +; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff ; GFX11-NEXT: s_lshl_b32 s6, s6, 8 -; GFX11-NEXT: s_lshr_b32 s10, s4, 24 +; GFX11-NEXT: s_and_b32 s7, s7, 0xff ; GFX11-NEXT: s_or_b32 s0, s0, s6 -; GFX11-NEXT: s_and_b32 s6, s7, 0xff +; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 -; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-NEXT: s_lshr_b32 s7, s4, 16 +; GFX11-NEXT: s_lshr_b32 s9, s4, 16 ; GFX11-NEXT: s_or_b32 s0, s0, s6 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX11-NEXT: s_lshr_b32 s6, s4, 8 -; GFX11-NEXT: s_and_b32 s4, s4, 0xff +; GFX11-NEXT: s_lshr_b32 s10, s4, 24 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff -; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 +; GFX11-NEXT: s_and_b32 s4, s4, 0xff +; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX11-NEXT: s_lshl_b32 s6, s6, 8 -; GFX11-NEXT: s_and_b32 s7, s7, 0xff -; GFX11-NEXT: s_or_b32 s4, s4, s6 -; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX11-NEXT: s_lshr_b32 s11, s5, 8 +; GFX11-NEXT: s_or_b32 s4, s4, s6 +; GFX11-NEXT: s_and_b32 s6, s9, 0xff +; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 +; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16 ; GFX11-NEXT: s_and_b32 s5, s5, 0xff ; GFX11-NEXT: s_or_b32 s4, s4, s6 ; GFX11-NEXT: s_lshl_b32 s5, s5, 8 ; GFX11-NEXT: s_and_b32 s6, s11, 0xff +; GFX11-NEXT: v_mul_hi_u32 v3, v0, v3 ; GFX11-NEXT: s_or_b32 s5, s10, s5 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2 -; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3 ; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-NEXT: s_lshr_b32 s9, s1, 8 -; GFX11-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX11-NEXT: s_lshr_b32 s7, s1, 8 ; GFX11-NEXT: s_or_b32 s5, s5, s6 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff -; GFX11-NEXT: s_and_b32 s7, s9, 0xff -; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v3 +; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 +; GFX11-NEXT: s_and_b32 s6, s7, 0xff ; GFX11-NEXT: s_lshr_b32 s7, s2, 8 -; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX11-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX11-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 ; GFX11-NEXT: s_or_b32 s1, s8, s1 ; GFX11-NEXT: s_lshr_b32 s8, s2, 16 ; GFX11-NEXT: s_and_b32 s7, s7, 0xff -; GFX11-NEXT: s_lshr_b32 s9, s3, 8 -; GFX11-NEXT: s_lshl_b32 s7, s7, 8 -; GFX11-NEXT: s_and_b32 s3, s3, 0xff -; GFX11-NEXT: v_sub_nc_u32_e32 v0, s4, v0 -; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX11-NEXT: s_lshr_b32 s4, s2, 24 +; GFX11-NEXT: s_lshr_b32 s9, s2, 24 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_lshl_b32 s7, s7, 8 +; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX11-NEXT: v_mul_hi_u32 v2, v1, v2 ; GFX11-NEXT: s_or_b32 s2, s2, s7 -; GFX11-NEXT: s_or_b32 s3, s4, s3 -; GFX11-NEXT: v_sub_nc_u32_e32 v1, s5, v1 +; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_sub_nc_u32_e32 v0, s5, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX11-NEXT: s_and_b32 s5, s8, 0xff -; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX11-NEXT: s_lshl_b32 s5, s5, 16 -; GFX11-NEXT: s_and_b32 s4, s9, 0xff ; GFX11-NEXT: s_or_b32 s2, s2, s5 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_lshr_b32 s2, s2, 1 -; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1 +; GFX11-NEXT: s_lshr_b32 s4, s3, 8 +; GFX11-NEXT: s_and_b32 s3, s3, 0xff +; GFX11-NEXT: s_and_b32 s4, s4, 0xff +; GFX11-NEXT: s_lshl_b32 s3, s3, 8 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX11-NEXT: s_or_b32 s3, s9, s3 +; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_or_b32 s3, s3, s4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-NEXT: s_lshr_b32 s3, s3, 1 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX11-NEXT: s_lshl_b32 s6, s6, 16 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 -; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v2 :: v_dual_and_b32 v0, 0xffffff, v0 -; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v1 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX11-NEXT: v_lshrrev_b32_e64 v2, v2, s2 -; GFX11-NEXT: s_and_b32 s2, 0xffff, s3 -; GFX11-NEXT: s_lshl_b32 s3, s4, 16 +; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 -; GFX11-NEXT: s_or_b32 s2, s2, s3 -; GFX11-NEXT: v_lshl_or_b32 v0, s0, v0, v2 -; GFX11-NEXT: s_lshr_b32 s0, s2, 1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_lshrrev_b32_e64 v2, v3, s0 -; GFX11-NEXT: s_or_b32 s0, s1, s6 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshrrev_b32_e64 v2, v2, s2 +; GFX11-NEXT: s_lshl_b32 s2, s6, 16 +; GFX11-NEXT: v_lshrrev_b32_e64 v3, v3, s3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v1, s0, v1, v2 +; GFX11-NEXT: s_or_b32 s0, s1, s2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_lshl_or_b32 v0, s0, v0, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v3 -; GFX11-NEXT: v_bfe_u32 v3, v0, 16, 8 -; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v1 +; GFX11-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v2 -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_lshlrev_b32_e32 v3, 24, v4 -; GFX11-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX11-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX11-NEXT: v_or3_b32 v0, v0, v2, v3 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX11-NEXT: v_and_or_b32 v1, v1, 0xff, v2 +; GFX11-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_or3_b32 v1, v1, v3, v4 +; GFX11-NEXT: v_lshl_or_b32 v0, v0, 8, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s0, v1 +; GFX11-NEXT: v_readfirstlane_b32 s1, v0 ; GFX11-NEXT: ; return to shader part epilog %lhs = bitcast i48 %lhs.arg to <2 x i24> %rhs = bitcast i48 %rhs.arg to <2 x i24> @@ -2455,37 +2454,35 @@ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX6-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX6-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX6-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX6-NEXT: v_mul_lo_u32 v9, v7, v8 +; GFX6-NEXT: v_mul_lo_u32 v8, v6, v8 +; GFX6-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v9 -; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX6-NEXT: v_mul_lo_u32 v7, v8, v7 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GFX6-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7 +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_mul_hi_u32 v7, v8, v7 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GFX6-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0 -; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v6 -; GFX6-NEXT: v_mul_lo_u32 v6, v7, 24 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 @@ -2509,37 +2506,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX8-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX8-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX8-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX8-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX8-NEXT: v_mul_lo_u32 v9, v7, v8 +; GFX8-NEXT: v_mul_lo_u32 v8, v6, v8 +; GFX8-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 -; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v9 -; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX8-NEXT: v_mul_lo_u32 v7, v8, v7 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 +; GFX8-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7 +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_mul_hi_u32 v7, v8, v7 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7 -; GFX8-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 23, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 +; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v4, v0 -; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v6 -; GFX8-NEXT: v_mul_lo_u32 v6, v7, 24 +; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 @@ -2563,37 +2558,35 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 -; GFX9-NEXT: v_mul_lo_u32 v8, v6, v7 ; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX9-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX9-NEXT: v_mul_lo_u32 v7, v9, v7 -; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX9-NEXT: v_bfe_u32 v3, v3, 1, 23 -; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 +; GFX9-NEXT: v_mul_lo_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v9, v7, v9 +; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v9 +; GFX9-NEXT: v_mul_hi_u32 v7, v4, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 +; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7 +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_mul_hi_u32 v6, v5, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc ; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4 ; GFX9-NEXT: v_and_b32_e32 v7, 0xffffff, v7 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v7, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v5, v6 @@ -2614,29 +2607,27 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX10-NEXT: v_bfe_u32 v2, v2, 1, 23 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7 ; GFX10-NEXT: v_bfe_u32 v3, v3, 1, 23 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 ; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 -; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 -; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 +; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX10-NEXT: v_mul_hi_u32 v8, v7, v8 +; GFX10-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v9 +; GFX10-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX10-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 -; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7 +; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 @@ -2665,64 +2656,63 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX11-NEXT: v_bfe_u32 v2, v2, 1, 23 ; GFX11-NEXT: v_bfe_u32 v3, v3, 1, 23 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v7, v7 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_mul_f32 v7, 0x4f7ffffe, v7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX11-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX11-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v9 ; GFX11-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX11-NEXT: v_mul_hi_u32 v9, v7, v9 +; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 +; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v9 +; GFX11-NEXT: v_mul_hi_u32 v8, v7, v8 +; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX11-NEXT: v_mul_hi_u32 v7, v4, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v7, v5, v7 ; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v7 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 -; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4 -; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2 ; GFX11-NEXT: v_and_b32_e32 v7, 0xffffff, v7 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2 +; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v4, v2 -; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_lshl_or_b32 v1, v1, v5, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -1371,48 +1371,48 @@ ; GFX10-LABEL: v_fshr_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_not_b32_e32 v5, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX10-NEXT: v_not_b32_e32 v8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GFX10-NEXT: v_not_b32_e32 v12, v7 -; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 -; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 +; GFX10-NEXT: v_and_b32_e32 v5, 7, v5 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 -; GFX10-NEXT: v_not_b32_e32 v13, v10 +; GFX10-NEXT: v_not_b32_e32 v10, v7 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2 +; GFX10-NEXT: v_lshlrev_b16 v0, v5, v0 +; GFX10-NEXT: v_and_b32_e32 v5, 7, v10 +; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 +; GFX10-NEXT: v_not_b32_e32 v13, v11 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_lshlrev_b16 v3, v12, v3 -; GFX10-NEXT: v_not_b32_e32 v12, v11 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 -; GFX10-NEXT: v_and_b32_e32 v8, 0xff, v1 +; GFX10-NEXT: v_and_b32_e32 v10, 0xff, v1 +; GFX10-NEXT: v_lshlrev_b16 v3, v5, v3 +; GFX10-NEXT: v_and_b32_e32 v5, 0xff, v8 +; GFX10-NEXT: v_not_b32_e32 v8, v12 ; GFX10-NEXT: v_and_b32_e32 v7, 7, v7 -; GFX10-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 +; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 ; GFX10-NEXT: v_and_b32_e32 v13, 7, v13 ; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 ; GFX10-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 +; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6 ; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 -; GFX10-NEXT: v_lshlrev_b16 v5, 1, v5 -; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX10-NEXT: v_lshrrev_b16 v6, v7, v6 +; GFX10-NEXT: v_lshrrev_b16 v5, v7, v5 ; GFX10-NEXT: v_lshlrev_b16 v4, v13, v4 -; GFX10-NEXT: v_lshrrev_b16 v1, v10, v1 -; GFX10-NEXT: v_lshlrev_b16 v5, v12, v5 -; GFX10-NEXT: v_lshrrev_b16 v7, v11, v9 -; GFX10-NEXT: v_lshrrev_b16 v2, v2, v8 -; GFX10-NEXT: v_or_b32_e32 v3, v3, v6 -; GFX10-NEXT: v_mov_b32_e32 v6, 8 +; GFX10-NEXT: v_lshrrev_b16 v1, v11, v1 +; GFX10-NEXT: v_lshlrev_b16 v6, v8, v6 +; GFX10-NEXT: v_lshrrev_b16 v7, v12, v9 +; GFX10-NEXT: v_lshrrev_b16 v2, v2, v10 +; GFX10-NEXT: v_or_b32_e32 v3, v3, v5 +; GFX10-NEXT: v_mov_b32_e32 v5, 8 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 -; GFX10-NEXT: v_or_b32_e32 v4, v5, v7 +; GFX10-NEXT: v_or_b32_e32 v4, v6, v7 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v4 ; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v2 @@ -1820,14 +1820,13 @@ ; GFX6-NEXT: s_lshr_b32 s6, s0, 16 ; GFX6-NEXT: s_lshr_b32 s7, s0, 24 ; GFX6-NEXT: s_lshr_b32 s8, s1, 8 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_and_b32 s9, s0, 0xff ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX6-NEXT: s_lshl_b32 s0, s0, 8 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: s_or_b32 s0, s9, s0 ; GFX6-NEXT: s_or_b32 s1, s7, s1 ; GFX6-NEXT: s_and_b32 s7, s8, 0xff @@ -1835,19 +1834,19 @@ ; GFX6-NEXT: s_lshr_b32 s9, s2, 24 ; GFX6-NEXT: s_and_b32 s11, s2, 0xff ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008 -; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8 ; GFX6-NEXT: s_and_b32 s8, s8, 0xff ; GFX6-NEXT: s_or_b32 s2, s11, s2 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX6-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX6-NEXT: s_lshr_b32 s10, s3, 8 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff +; GFX6-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX6-NEXT: s_or_b32 s2, s2, s8 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8 ; GFX6-NEXT: s_and_b32 s8, s10, 0xff -; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_or_b32 s3, s9, s3 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 @@ -1857,103 +1856,100 @@ ; GFX6-NEXT: s_lshr_b32 s9, s4, 24 ; GFX6-NEXT: s_and_b32 s11, s4, 0xff ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008 +; GFX6-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX6-NEXT: s_lshl_b32 s4, s4, 8 ; GFX6-NEXT: s_and_b32 s8, s8, 0xff -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX6-NEXT: s_or_b32 s4, s11, s4 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_or_b32 s4, s4, s8 -; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX6-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_lshr_b32 s10, s5, 8 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX6-NEXT: s_and_b32 s5, s5, 0xff -; GFX6-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX6-NEXT: s_lshl_b32 s5, s5, 8 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX6-NEXT: s_and_b32 s8, s10, 0xff -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: s_or_b32 s5, s9, s5 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX6-NEXT: s_or_b32 s5, s5, s8 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v1 ; GFX6-NEXT: s_lshl_b32 s4, s6, 17 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1 ; GFX6-NEXT: s_or_b32 s0, s4, s0 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_lshr_b32_e32 v0, s2, v0 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_lshr_b32_e32 v1, s2, v1 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX6-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0 ; GFX6-NEXT: s_lshl_b32 s0, s7, 17 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_lshr_b32_e32 v1, s3, v1 -; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_lshr_b32_e32 v0, s3, v0 +; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_readfirstlane_b32 s0, v1 +; GFX6-NEXT: v_readfirstlane_b32 s1, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_fshr_v2i24: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s9, s1, 8 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s6, s0, 8 ; GFX8-NEXT: s_lshr_b32 s8, s0, 24 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff ; GFX8-NEXT: s_or_b32 s1, s8, s1 ; GFX8-NEXT: s_lshr_b32 s8, s2, 8 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s7, s0, 16 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff ; GFX8-NEXT: s_lshl_b32 s6, s6, 8 @@ -1965,11 +1961,11 @@ ; GFX8-NEXT: s_lshr_b32 s10, s2, 24 ; GFX8-NEXT: s_and_b32 s2, s2, 0xff ; GFX8-NEXT: s_lshl_b32 s8, s8, 8 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX8-NEXT: s_or_b32 s2, s2, s8 ; GFX8-NEXT: s_and_b32 s8, s9, 0xff -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 +; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX8-NEXT: s_lshr_b32 s11, s3, 8 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 @@ -1977,11 +1973,12 @@ ; GFX8-NEXT: s_or_b32 s2, s2, s8 ; GFX8-NEXT: s_lshl_b32 s3, s3, 8 ; GFX8-NEXT: s_and_b32 s8, s11, 0xff +; GFX8-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX8-NEXT: s_or_b32 s3, s10, s3 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX8-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 -; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_or_b32 s3, s3, s8 ; GFX8-NEXT: s_lshr_b32 s8, s4, 8 ; GFX8-NEXT: s_and_b32 s8, s8, 0xff @@ -1989,101 +1986,95 @@ ; GFX8-NEXT: s_lshr_b32 s10, s4, 24 ; GFX8-NEXT: s_and_b32 s4, s4, 0xff ; GFX8-NEXT: s_lshl_b32 s8, s8, 8 +; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX8-NEXT: s_or_b32 s4, s4, s8 ; GFX8-NEXT: s_and_b32 s8, s9, 0xff -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 ; GFX8-NEXT: s_or_b32 s4, s4, s8 -; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 +; GFX8-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX8-NEXT: s_lshr_b32 s11, s5, 8 -; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX8-NEXT: s_and_b32 s5, s5, 0xff -; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX8-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX8-NEXT: s_lshl_b32 s5, s5, 8 -; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX8-NEXT: s_and_b32 s8, s11, 0xff -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: s_or_b32 s5, s10, s5 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX8-NEXT: s_or_b32 s5, s5, s8 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0 -; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v1 +; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX8-NEXT: s_lshl_b32 s4, s6, 17 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1 ; GFX8-NEXT: s_or_b32 s0, s4, s0 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s2 -; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s2 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX8-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0 ; GFX8-NEXT: s_lshl_b32 s0, s7, 17 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1 ; GFX8-NEXT: s_or_b32 s0, s0, s1 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s3 -; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s3 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_mov_b32_e32 v4, 16 -; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v1 +; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX8-NEXT: v_readfirstlane_b32 s0, v0 -; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_readfirstlane_b32 s0, v1 +; GFX8-NEXT: v_readfirstlane_b32 s1, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_fshr_v2i24: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 ; GFX9-NEXT: s_lshr_b32 s11, s1, 8 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_lshr_b32 s7, s0, 8 ; GFX9-NEXT: s_lshr_b32 s10, s0, 24 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8 -; GFX9-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff ; GFX9-NEXT: s_or_b32 s1, s10, s1 ; GFX9-NEXT: s_lshr_b32 s10, s2, 8 -; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_lshr_b32 s9, s0, 16 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff ; GFX9-NEXT: s_lshl_b32 s7, s7, 8 @@ -2095,12 +2086,11 @@ ; GFX9-NEXT: s_lshr_b32 s12, s2, 24 ; GFX9-NEXT: s_and_b32 s2, s2, 0xff ; GFX9-NEXT: s_lshl_b32 s10, s10, 8 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX9-NEXT: s_or_b32 s2, s2, s10 ; GFX9-NEXT: s_and_b32 s10, s11, 0xff -; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 +; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX9-NEXT: s_lshr_b32 s13, s3, 8 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 @@ -2108,101 +2098,103 @@ ; GFX9-NEXT: s_or_b32 s2, s2, s10 ; GFX9-NEXT: s_lshl_b32 s3, s3, 8 ; GFX9-NEXT: s_and_b32 s10, s13, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffffe8 ; GFX9-NEXT: s_or_b32 s3, s12, s3 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 +; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 -; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: s_or_b32 s3, s3, s10 ; GFX9-NEXT: s_lshr_b32 s10, s4, 8 -; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_and_b32 s10, s10, 0xff ; GFX9-NEXT: s_lshr_b32 s11, s4, 16 ; GFX9-NEXT: s_lshr_b32 s12, s4, 24 ; GFX9-NEXT: s_and_b32 s4, s4, 0xff ; GFX9-NEXT: s_lshl_b32 s10, s10, 8 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX9-NEXT: s_or_b32 s4, s4, s10 ; GFX9-NEXT: s_and_b32 s10, s11, 0xff ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 -; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_lo_u32 v2, v0, v2 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 ; GFX9-NEXT: s_or_b32 s4, s4, s10 -; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX9-NEXT: s_lshr_b32 s13, s5, 8 ; GFX9-NEXT: s_and_b32 s5, s5, 0xff -; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX9-NEXT: s_lshl_b32 s5, s5, 8 ; GFX9-NEXT: s_and_b32 s10, s13, 0xff ; GFX9-NEXT: s_or_b32 s5, s12, s5 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10 ; GFX9-NEXT: s_and_b32 s5, 0xffff, s5 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16 -; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX9-NEXT: s_or_b32 s5, s5, s10 -; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 -; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 -; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: s_or_b32 s5, s5, s10 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1 +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_sub_u32_e32 v3, 23, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_sub_u32_e32 v3, 23, v1 ; GFX9-NEXT: s_lshl_b32 s4, s7, 17 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 -; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 ; GFX9-NEXT: s_or_b32 s0, s4, s0 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v3 -; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s2 -; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 -; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0 -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s2 +; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0 +; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX9-NEXT: s_and_b32 s9, 0xffff, s9 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 ; GFX9-NEXT: s_lshl_b32 s0, s9, 17 ; GFX9-NEXT: s_lshl_b32 s1, s1, 1 -; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 ; GFX9-NEXT: s_or_b32 s0, s0, s1 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s3 +; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s3 ; GFX9-NEXT: s_mov_b32 s6, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0 ; GFX9-NEXT: s_mov_b32 s8, 16 ; GFX9-NEXT: s_movk_i32 s0, 0xff -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX9-NEXT: v_and_or_b32 v2, v0, s0, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX9-NEXT: v_and_or_b32 v2, v1, s0, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 -; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX9-NEXT: v_bfe_u32 v2, v0, 8, 8 +; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, 8, v2 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_fshr_v2i24: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX10-NEXT: s_lshr_b32 s9, s1, 8 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff ; GFX10-NEXT: s_lshr_b32 s6, s0, 8 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff ; GFX10-NEXT: s_or_b32 s1, s8, s1 @@ -2210,123 +2202,121 @@ ; GFX10-NEXT: s_lshr_b32 s7, s0, 16 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff ; GFX10-NEXT: s_or_b32 s0, s0, s6 +; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: s_and_b32 s6, s7, 0xff -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: s_and_b32 s7, s9, 0xff ; GFX10-NEXT: s_lshr_b32 s9, s4, 16 +; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: s_lshr_b32 s10, s4, 24 -; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff ; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_lshr_b32 s11, s5, 8 +; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 +; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 ; GFX10-NEXT: s_or_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s8, s9, 0xff -; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX10-NEXT: s_lshr_b32 s11, s5, 8 ; GFX10-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX10-NEXT: s_and_b32 s5, s5, 0xff +; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX10-NEXT: v_mul_hi_u32 v3, v0, v3 ; GFX10-NEXT: s_lshl_b32 s8, s8, 16 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8 ; GFX10-NEXT: s_or_b32 s4, s4, s8 ; GFX10-NEXT: s_and_b32 s8, s11, 0xff -; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 ; GFX10-NEXT: s_or_b32 s5, s10, s5 ; GFX10-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v3 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 ; GFX10-NEXT: s_lshl_b32 s8, s8, 16 ; GFX10-NEXT: s_lshr_b32 s9, s2, 8 +; GFX10-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX10-NEXT: s_or_b32 s5, s5, s8 ; GFX10-NEXT: s_lshr_b32 s8, s2, 16 -; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX10-NEXT: s_and_b32 s9, s9, 0xff ; GFX10-NEXT: s_lshr_b32 s10, s2, 24 -; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX10-NEXT: s_lshr_b32 s11, s3, 8 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff +; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX10-NEXT: s_lshl_b32 s9, s9, 8 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff -; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX10-NEXT: s_and_b32 s3, s3, 0xff ; GFX10-NEXT: s_or_b32 s2, s2, s9 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 -; GFX10-NEXT: s_and_b32 s4, 0xffff, s8 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX10-NEXT: s_and_b32 s5, s11, 0xff +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1 +; GFX10-NEXT: s_and_b32 s4, 0xffff, s8 ; GFX10-NEXT: s_or_b32 s3, s10, s3 -; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v0, s5, v0 +; GFX10-NEXT: s_and_b32 s5, s11, 0xff ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX10-NEXT: s_lshl_b32 s4, s4, 16 +; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 +; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX10-NEXT: s_lshl_b32 s5, s5, 16 -; GFX10-NEXT: s_or_b32 s2, s2, s4 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 -; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX10-NEXT: s_or_b32 s2, s2, s4 +; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 ; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_or_b32 s3, s3, s5 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: s_lshl_b32 s4, s6, 17 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1 -; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX10-NEXT: s_or_b32 s0, s4, s0 ; GFX10-NEXT: s_lshl_b32 s1, s1, 1 -; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 -; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s2 +; GFX10-NEXT: s_or_b32 s0, s4, s0 +; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0 +; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s2 ; GFX10-NEXT: s_lshl_b32 s2, s7, 17 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s3 -; GFX10-NEXT: v_lshl_or_b32 v0, s0, v3, v0 +; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s3 +; GFX10-NEXT: v_lshl_or_b32 v1, s0, v3, v1 ; GFX10-NEXT: s_or_b32 s0, s2, s1 -; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX10-NEXT: v_lshl_or_b32 v0, s0, v2, v0 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: s_mov_b32 s0, 16 -; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v1 -; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX10-NEXT: v_and_or_b32 v2, v0, 0xff, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX10-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX10-NEXT: v_and_or_b32 v2, v1, 0xff, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX10-NEXT: v_readfirstlane_b32 s1, v1 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_lshl_or_b32 v0, v0, 8, v4 +; GFX10-NEXT: v_or3_b32 v1, v2, v1, v3 +; GFX10-NEXT: v_readfirstlane_b32 s1, v0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: s_fshr_v2i24: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX11-NEXT: s_lshr_b32 s6, s0, 8 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff -; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX11-NEXT: s_lshr_b32 s8, s0, 24 +; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff ; GFX11-NEXT: s_lshl_b32 s6, s6, 8 ; GFX11-NEXT: s_lshr_b32 s9, s1, 8 @@ -2334,122 +2324,124 @@ ; GFX11-NEXT: s_and_b32 s6, s7, 0xff ; GFX11-NEXT: s_and_b32 s7, s9, 0xff ; GFX11-NEXT: s_lshr_b32 s9, s4, 8 -; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1 ; GFX11-NEXT: s_lshr_b32 s10, s4, 16 ; GFX11-NEXT: s_and_b32 s9, s9, 0xff +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 ; GFX11-NEXT: s_and_b32 s11, s4, 0xff -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX11-NEXT: s_lshl_b32 s9, s9, 8 ; GFX11-NEXT: s_and_b32 s10, s10, 0xff ; GFX11-NEXT: s_or_b32 s9, s11, s9 -; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 -; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 +; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX11-NEXT: s_and_b32 s10, 0xffff, s10 ; GFX11-NEXT: s_and_b32 s9, 0xffff, s9 ; GFX11-NEXT: s_lshl_b32 s10, s10, 16 ; GFX11-NEXT: s_lshr_b32 s11, s5, 8 -; GFX11-NEXT: s_or_b32 s9, s9, s10 +; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v1 ; GFX11-NEXT: s_and_b32 s5, s5, 0xff -; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX11-NEXT: s_lshr_b32 s4, s4, 24 +; GFX11-NEXT: s_or_b32 s9, s9, s10 ; GFX11-NEXT: s_lshl_b32 s5, s5, 8 ; GFX11-NEXT: s_and_b32 s10, s11, 0xff ; GFX11-NEXT: s_or_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s5, 0xffff, s10 +; GFX11-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 ; GFX11-NEXT: s_lshl_b32 s5, s5, 16 -; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2 -; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3 -; GFX11-NEXT: s_or_b32 s4, s4, s5 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff -; GFX11-NEXT: s_lshr_b32 s10, s2, 16 -; GFX11-NEXT: v_mul_hi_u32 v0, s9, v0 +; GFX11-NEXT: s_or_b32 s4, s4, s5 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8 -; GFX11-NEXT: s_lshr_b32 s5, s2, 24 -; GFX11-NEXT: s_or_b32 s1, s8, s1 +; GFX11-NEXT: s_lshr_b32 s10, s2, 16 ; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX11-NEXT: s_or_b32 s1, s8, s1 ; GFX11-NEXT: s_lshr_b32 s8, s2, 8 -; GFX11-NEXT: s_and_b32 s2, s2, 0xff +; GFX11-NEXT: s_lshr_b32 s5, s2, 24 +; GFX11-NEXT: v_mul_hi_u32 v1, s9, v1 +; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v0 ; GFX11-NEXT: s_and_b32 s8, s8, 0xff -; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX11-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX11-NEXT: s_and_b32 s2, s2, 0xff ; GFX11-NEXT: s_lshl_b32 s8, s8, 8 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX11-NEXT: s_or_b32 s2, s2, s8 ; GFX11-NEXT: s_and_b32 s8, s10, 0xff -; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX11-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX11-NEXT: v_sub_nc_u32_e32 v0, s9, v0 ; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX11-NEXT: v_mul_hi_u32 v2, v0, v3 +; GFX11-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX11-NEXT: s_lshl_b32 s8, s8, 16 +; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX11-NEXT: s_or_b32 s2, s2, s8 +; GFX11-NEXT: s_lshl_b32 s0, s0, 1 +; GFX11-NEXT: v_sub_nc_u32_e32 v1, s9, v1 +; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2 ; GFX11-NEXT: s_lshr_b32 s9, s3, 8 ; GFX11-NEXT: s_and_b32 s3, s3, 0xff -; GFX11-NEXT: s_lshl_b32 s8, s8, 16 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX11-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX11-NEXT: s_lshl_b32 s3, s3, 8 -; GFX11-NEXT: s_or_b32 s2, s2, s8 -; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1 -; GFX11-NEXT: s_and_b32 s4, s9, 0xff -; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 +; GFX11-NEXT: s_and_b32 s7, 0xffff, s7 ; GFX11-NEXT: s_or_b32 s3, s5, s3 -; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 -; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX11-NEXT: s_lshl_b32 s4, s4, 16 ; GFX11-NEXT: s_lshl_b32 s5, s6, 17 -; GFX11-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX11-NEXT: s_or_b32 s0, s5, s0 -; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX11-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 ; GFX11-NEXT: s_lshl_b32 s1, s1, 1 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_sub_nc_u32_e32 v0, s4, v0 +; GFX11-NEXT: s_and_b32 s4, s9, 0xff +; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: s_lshl_b32 s4, s4, 16 +; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v1 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v0 -; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2 -; GFX11-NEXT: s_or_b32 s2, s3, s4 ; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX11-NEXT: s_or_b32 s2, s3, s4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3 +; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2 ; GFX11-NEXT: s_lshl_b32 s0, s7, 17 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: s_or_b32 s0, s0, s1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_bfe_u32 v3, v0, 8, 8 -; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX11-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v3 -; GFX11-NEXT: v_bfe_u32 v3, v0, 16, 8 -; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v1 +; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v2 +; GFX11-NEXT: v_and_or_b32 v1, v1, 0xff, v2 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 24, v4 -; GFX11-NEXT: v_bfe_u32 v4, v1, 8, 8 -; GFX11-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX11-NEXT: v_or3_b32 v0, v0, v2, v3 +; GFX11-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX11-NEXT: v_or3_b32 v1, v1, v2, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_lshl_or_b32 v1, v1, 8, v4 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_lshl_or_b32 v0, v0, 8, v4 +; GFX11-NEXT: v_readfirstlane_b32 s0, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s1, v0 ; GFX11-NEXT: ; return to shader part epilog %lhs = bitcast i48 %lhs.arg to <2 x i24> %rhs = bitcast i48 %rhs.arg to <2 x i24> @@ -2465,42 +2457,40 @@ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8 +; GFX6-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 +; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX6-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX6-NEXT: v_cvt_u32_f32_e32 v7, v7 ; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX6-NEXT: v_mul_lo_u32 v8, v6, v8 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v9 -; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 -; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GFX6-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7 +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX6-NEXT: v_mul_lo_u32 v6, v8, v7 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4 ; GFX6-NEXT: v_and_b32_e32 v7, 0xffffff, v7 -; GFX6-NEXT: v_mul_hi_u32 v6, v8, v6 +; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX6-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 @@ -2521,42 +2511,40 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8 +; GFX8-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 +; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX8-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX8-NEXT: v_cvt_u32_f32_e32 v7, v7 ; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX8-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7 +; GFX8-NEXT: v_mul_lo_u32 v8, v6, v8 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX8-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 -; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v8, v9 -; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 -; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 +; GFX8-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7 +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX8-NEXT: v_mul_lo_u32 v6, v8, v7 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 +; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4 ; GFX8-NEXT: v_and_b32_e32 v7, 0xffffff, v7 -; GFX8-NEXT: v_mul_hi_u32 v6, v8, v6 +; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v7, v0 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 -; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 @@ -2577,42 +2565,40 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 -; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffffe8 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_mul_lo_u32 v8, v6, v7 ; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5 -; GFX9-NEXT: v_mul_lo_u32 v7, v9, v7 +; GFX9-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_lo_u32 v9, v7, v8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 +; GFX9-NEXT: v_mul_lo_u32 v8, v6, v8 ; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3 -; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 -; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX9-NEXT: v_mul_hi_u32 v9, v7, v9 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 -; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v9 +; GFX9-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7 +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX9-NEXT: v_subrev_u32_e32 v7, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6 +; GFX9-NEXT: v_and_b32_e32 v7, 0xffffff, v7 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 -; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, v7, v2 +; GFX9-NEXT: v_sub_u32_e32 v2, v5, v6 ; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc @@ -2630,31 +2616,29 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 ; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 -; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 -; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 -; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9 -; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 +; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX10-NEXT: v_mul_hi_u32 v8, v7, v8 +; GFX10-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v9 +; GFX10-NEXT: v_mul_hi_u32 v7, v4, v7 +; GFX10-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 -; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7 +; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 @@ -2683,40 +2667,37 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 -; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 1, v1 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX11-NEXT: v_rcp_iflag_f32_e32 v7, v7 -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1 -; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v6 +; GFX11-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_mul_hi_u32 v9, v6, v9 +; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v9 ; GFX11-NEXT: v_cvt_u32_f32_e32 v7, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 -; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 +; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX11-NEXT: v_mul_hi_u32 v9, v7, v9 +; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX11-NEXT: v_mul_hi_u32 v8, v7, v8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v8 -; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v9 +; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6 +; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v8 +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX11-NEXT: v_mul_hi_u32 v7, v4, v7 ; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v7 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 -; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v6 ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) @@ -2729,20 +2710,20 @@ ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4 ; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, v5, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v6, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_lshl_or_b32 v1, v1, v4, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -25,12 +25,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -63,12 +65,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -101,12 +105,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -139,12 +145,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_p3_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_p3_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -177,12 +185,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_p5_from_4 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_p5_from_4 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -216,12 +226,14 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_v2s16 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -260,12 +272,14 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -296,14 +310,6 @@ bb.0: liveins: $vgpr0 - ; GFX6-LABEL: name: load_private_s32_from_1_gep_2047_known_bits - ; GFX6: liveins: $vgpr0 - ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) - ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -312,6 +318,7 @@ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -353,12 +360,14 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -397,6 +406,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -405,6 +415,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -443,6 +454,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -451,6 +463,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -489,12 +502,14 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -533,6 +548,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -541,6 +557,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -579,6 +596,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -587,6 +605,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -625,6 +644,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -633,6 +653,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -671,6 +692,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -679,6 +701,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -717,6 +740,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -725,6 +749,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -763,6 +788,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -771,6 +797,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -809,6 +836,7 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -817,6 +845,7 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} @@ -849,9 +878,11 @@ ; GFX6-LABEL: name: load_private_s32_from_4_constant_0 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX9-LABEL: name: load_private_s32_from_4_constant_0 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX11-LABEL: name: load_private_s32_from_4_constant_0 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) @@ -878,9 +909,11 @@ ; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; ; GFX11-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX11: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 16 ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) @@ -907,9 +940,11 @@ ; GFX6-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) @@ -937,10 +972,12 @@ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) @@ -969,9 +1006,11 @@ ; GFX6-LABEL: name: load_private_s32_from_fi ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_fi ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_fi ; GFX11: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] @@ -998,9 +1037,11 @@ ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] @@ -1030,9 +1071,11 @@ ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] @@ -1066,12 +1109,14 @@ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE_SVS:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SVS [[V_MOV_B32_e32_]], %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) @@ -1102,10 +1147,12 @@ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX9-LABEL: name: load_private_s32_from_neg1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; ; GFX11-LABEL: name: load_private_s32_from_neg1 ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -18,6 +18,7 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX8-LABEL: name: test_add_s32 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX9-LABEL: name: test_add_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_add_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -66,6 +69,7 @@ ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_add_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -98,6 +102,7 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX8-LABEL: name: test_add_s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -108,6 +113,7 @@ ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; ; GFX9-LABEL: name: test_add_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -152,6 +158,7 @@ ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX8-LABEL: name: test_add_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -174,6 +181,7 @@ ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_add_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -208,6 +216,7 @@ ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY5]] ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) ; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16) + ; ; GFX8-LABEL: name: test_add_v3s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-NEXT: {{ $}} @@ -227,6 +236,7 @@ ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] ; GFX8-NEXT: S_ENDPGM 0, implicit [[ADD]](s16), implicit [[ADD1]](s16), implicit [[ADD2]](s16) + ; ; GFX9-LABEL: name: test_add_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-NEXT: {{ $}} @@ -315,6 +325,7 @@ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: test_add_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -355,6 +366,7 @@ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_add_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -389,6 +401,7 @@ ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX8-LABEL: name: test_add_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -400,6 +413,7 @@ ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -432,6 +446,7 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; GFX6-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX8-LABEL: name: test_add_s7 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -440,10 +455,11 @@ ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX8-NEXT: $vgpr0 = COPY [[AND]](s32) + ; ; GFX9-LABEL: name: test_add_s7 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -452,8 +468,8 @@ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 @@ -478,6 +494,7 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX8-LABEL: name: test_add_s24 ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} @@ -485,6 +502,7 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; ; GFX9-LABEL: name: test_add_s24 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -517,6 +535,7 @@ ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX8-LABEL: name: test_add_s33 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -528,6 +547,7 @@ ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX9-LABEL: name: test_add_s33 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -566,6 +586,7 @@ ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; ; GFX8-LABEL: name: test_add_s96 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} @@ -578,6 +599,7 @@ ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]] ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; ; GFX9-LABEL: name: test_add_s96 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -469,10 +469,9 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) @@ -531,10 +530,9 @@ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -543,38 +541,34 @@ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL1]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL3]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND5]](<4 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND3]](<4 x s16>) ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL5]] ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -620,8 +620,8 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] @@ -631,13 +631,12 @@ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C4]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[OR2]](s16) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) @@ -685,8 +684,7 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -18,6 +18,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s32_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -51,6 +53,7 @@ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; VI-LABEL: name: test_ashr_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -59,6 +62,7 @@ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s64 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -85,6 +89,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; VI-LABEL: name: test_ashr_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -92,6 +97,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[COPY1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9PLUS-NEXT: {{ $}} @@ -119,6 +125,7 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; VI-LABEL: name: test_ashr_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -128,6 +135,7 @@ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[AND]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64) + ; ; GFX9PLUS-LABEL: name: test_ashr_s64_s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9PLUS-NEXT: {{ $}} @@ -158,6 +166,7 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[COPY1]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -168,6 +177,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s16_s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -202,6 +212,7 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -212,6 +223,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s16_s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -247,26 +259,28 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s16_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s16_i8 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16) ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) @@ -296,13 +310,14 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_i8_i8 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 @@ -311,13 +326,14 @@ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_i8_i8 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) @@ -349,13 +365,14 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s7_s7 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 @@ -364,13 +381,14 @@ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s7_s7 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 ; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) @@ -402,6 +420,7 @@ ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s24_s24 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -412,6 +431,7 @@ ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 24 ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s24_s24 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -446,6 +466,7 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[AND]](s32) ; SI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; VI-LABEL: name: test_ashr_s32_s24 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -455,6 +476,7 @@ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[AND]](s32) ; VI-NEXT: $vgpr0 = COPY [[ASHR]](s32) + ; ; GFX9PLUS-LABEL: name: test_ashr_s32_s24 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -488,6 +510,7 @@ ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_ashr_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -499,6 +522,7 @@ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s32_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -534,6 +558,7 @@ ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[UV5]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_ashr_v3s32_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -546,6 +571,7 @@ ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[UV5]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v3s32_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -581,6 +607,7 @@ ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV3]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_ashr_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -592,6 +619,7 @@ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV3]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s64_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -629,6 +657,7 @@ ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; VI-LABEL: name: test_ashr_v3s64_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; VI-NEXT: {{ $}} @@ -643,6 +672,7 @@ ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10 ; GFX9PLUS-NEXT: {{ $}} @@ -686,15 +716,15 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_ashr_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -717,6 +747,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s16_v2s16 ; GFX9PLUS: liveins: $vgpr0, $vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -756,6 +787,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_ashr_v2s16_v2s32 ; VI: liveins: $vgpr0, $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -777,6 +809,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s16_v2s32 ; GFX9PLUS: liveins: $vgpr0, $vgpr0_vgpr1 ; GFX9PLUS-NEXT: {{ $}} @@ -823,34 +856,33 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND2]](s32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND1]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL2]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -890,13 +922,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -967,27 +999,26 @@ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[AND1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[LSHR2]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND2]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND1]](s32) ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[AND3]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[LSHR3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_ashr_v4s16_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -1028,6 +1059,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v4s16_v4s16 ; GFX9PLUS: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -1056,13 +1088,13 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C1]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) @@ -1075,18 +1107,19 @@ ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s128 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C1]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) @@ -1099,18 +1132,19 @@ ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s128 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C1]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) @@ -1160,6 +1194,7 @@ ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s132 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1184,6 +1219,7 @@ ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s132 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1225,11 +1261,13 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_0 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_0 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1261,6 +1299,7 @@ ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_23 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1274,6 +1313,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_23 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1312,6 +1352,7 @@ ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_31 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1325,6 +1366,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_31 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1362,6 +1404,7 @@ ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1374,6 +1417,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1411,6 +1455,7 @@ ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_33 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1424,6 +1469,7 @@ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[ASHR]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_33 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1459,6 +1505,7 @@ ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[ASHR]](s64), [[ASHR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; VI-LABEL: name: test_ashr_s128_s32_127 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; VI-NEXT: {{ $}} @@ -1469,6 +1516,7 @@ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) ; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[ASHR]](s64), [[ASHR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; ; GFX9PLUS-LABEL: name: test_ashr_s128_s32_127 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX9PLUS-NEXT: {{ $}} @@ -1496,71 +1544,71 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; SI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; SI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; SI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; SI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; SI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; SI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; SI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; SI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] ; SI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; SI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; SI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; SI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; SI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; SI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] ; SI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; SI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; SI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; SI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; SI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) - ; SI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; SI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) ; SI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; SI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; SI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; SI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; SI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; SI-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; SI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) + ; SI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) ; SI-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; SI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; SI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1576,76 +1624,77 @@ ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; VI-LABEL: name: test_ashr_s256_s256 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; VI-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; VI-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; VI-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; VI-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; VI-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; VI-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; VI-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; VI-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] ; VI-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; VI-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; VI-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; VI-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; VI-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; VI-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] ; VI-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; VI-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; VI-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; VI-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; VI-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) - ; VI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; VI-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; VI-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) ; VI-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; VI-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; VI-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; VI-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; VI-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; VI-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; VI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) + ; VI-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) ; VI-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; VI-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; VI-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1661,76 +1710,77 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV2]](s256) + ; ; GFX9PLUS-LABEL: name: test_ashr_s256_s256 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] - ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] - ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9PLUS-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C]] + ; GFX9PLUS-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9PLUS-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9PLUS-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9PLUS-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) + ; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; GFX9PLUS-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; GFX9PLUS-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; GFX9PLUS-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9PLUS-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY1]] - ; GFX9PLUS-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[COPY1]] + ; GFX9PLUS-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY1]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[COPY1]](s32) ; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; GFX9PLUS-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; GFX9PLUS-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9PLUS-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] + ; GFX9PLUS-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] - ; GFX9PLUS-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] + ; GFX9PLUS-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C1]] ; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; GFX9PLUS-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; GFX9PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; GFX9PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9PLUS-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] + ; GFX9PLUS-NEXT: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] ; GFX9PLUS-NEXT: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; GFX9PLUS-NEXT: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; GFX9PLUS-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; GFX9PLUS-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; GFX9PLUS-NEXT: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) - ; GFX9PLUS-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; GFX9PLUS-NEXT: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) ; GFX9PLUS-NEXT: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9PLUS-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] - ; GFX9PLUS-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] - ; GFX9PLUS-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] - ; GFX9PLUS-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] + ; GFX9PLUS-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] + ; GFX9PLUS-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] + ; GFX9PLUS-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; GFX9PLUS-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C1]] ; GFX9PLUS-NEXT: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; GFX9PLUS-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; GFX9PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; GFX9PLUS-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; GFX9PLUS-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) + ; GFX9PLUS-NEXT: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) ; GFX9PLUS-NEXT: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; GFX9PLUS-NEXT: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; GFX9PLUS-NEXT: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1801,6 +1851,7 @@ ; SI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; VI-LABEL: name: test_ashr_v2s128_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -1843,6 +1894,7 @@ ; VI-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT4]](s64), [[SELECT5]](s64) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[MV]](s128), [[MV1]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>) + ; ; GFX9PLUS-LABEL: name: test_ashr_v2s128_v2s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5 ; GFX9PLUS-NEXT: {{ $}} @@ -1927,6 +1979,7 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_ashr_s65_s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -1957,6 +2010,7 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9PLUS-NEXT: {{ $}} @@ -2030,6 +2084,7 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; VI-LABEL: name: test_ashr_s65_s32_constant8 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -2059,6 +2114,7 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96) + ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32_constant8 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9PLUS-NEXT: {{ $}} @@ -2133,6 +2189,7 @@ ; SI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; VI-LABEL: name: test_ashr_s65_s32_known_pow2 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; VI-NEXT: {{ $}} @@ -2164,6 +2221,7 @@ ; VI-NEXT: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s128) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; ; GFX9PLUS-LABEL: name: test_ashr_s65_s32_known_pow2 ; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 ; GFX9PLUS-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -440,8 +440,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -529,8 +529,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -968,8 +968,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] @@ -1645,8 +1645,8 @@ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] @@ -2018,8 +2018,8 @@ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] @@ -2285,60 +2285,54 @@ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL3]] ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR8]](s16) ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR9]](s16) ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL5]] + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL5]] ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR10]](s16) ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT6]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL6]] + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR11]](s16) ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[SHL7]] ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR14]](s16) ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL8]] + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C2]] ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR15]](s16) ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL9]] + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[LSHR12]], [[SHL9]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR16]](s16) ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT10]], [[C]](s32) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL10]] + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL10]] ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C2]] ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR17]](s16) ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL11]] + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[LSHR13]], [[SHL11]] ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<24 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir @@ -98,11 +98,8 @@ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE]], [[C]](s32) ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:_(s32) = G_BITREVERSE [[LSHR]] ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE1]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -13,14 +13,13 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX8-LABEL: name: bswap_s8 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -59,6 +58,7 @@ ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: bswap_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -85,14 +85,13 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; GFX7-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY2]](s32) + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32) ; GFX7-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] ; GFX7-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX8-LABEL: name: bswap_s24 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -120,6 +119,7 @@ ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]] ; GFX7-NEXT: $vgpr0 = COPY [[BSWAP]](s32) + ; ; GFX8-LABEL: name: bswap_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -157,8 +157,7 @@ ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -167,6 +166,7 @@ ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX7-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX8-LABEL: name: bswap_v2s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -221,6 +221,7 @@ ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; ; GFX8-LABEL: name: bswap_v3s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} @@ -294,24 +295,22 @@ ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY2]](s32) ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32) + ; GFX7-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[COPY3]](s32) ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) ; GFX7-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC2]] ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[COPY4]](s32) ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY5]](s32) + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; GFX7-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; GFX7-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC4]] ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX7-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY6]](s32) ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX7-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY7]](s32) + ; GFX7-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY7]](s32) ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; GFX7-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC6]] ; GFX7-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -326,6 +325,7 @@ ; GFX7-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX8-LABEL: name: bswap_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -355,6 +355,7 @@ ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: bswap_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -384,6 +385,7 @@ ; GFX7-NEXT: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] ; GFX7-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP]](s32), [[BSWAP1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; ; GFX8-LABEL: name: bswap_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -419,6 +421,7 @@ ; GFX7-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[BSWAP2]](s32), [[BSWAP3]](s32) ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX8-LABEL: name: bswap_v2s64 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -156,14 +156,13 @@ ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND1]](s32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[LSHR]](s32) ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF1]], [[C]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -214,8 +213,8 @@ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[CTLZ_ZERO_UNDEF]], [[UV]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 %2:_(s33) = G_CTLZ_ZERO_UNDEF %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -72,9 +72,7 @@ ; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTLZ %0 %2:_(s32) = G_ZEXT %1 @@ -174,15 +172,14 @@ ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C2]] ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND1]](s32) + ; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR]](s32) ; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C2]] ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UMIN1]], [[C]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -237,8 +234,8 @@ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UMIN]], [[UV]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 %2:_(s33) = G_CTLZ %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -14,8 +14,7 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 %2:_(s8) = G_CTPOP %1 @@ -36,8 +35,7 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s9) = G_TRUNC %0 %2:_(s9) = G_CTPOP %1 @@ -108,9 +106,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[CTPOP]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[CTPOP]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTPOP %0 %2:_(s32) = G_ZEXT %1 @@ -131,8 +127,7 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_CTPOP %1 @@ -197,13 +192,10 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[AND1]](s32) + ; CHECK-NEXT: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[LSHR]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -226,8 +218,7 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 %2:_(s7) = G_CTPOP %1 @@ -267,12 +258,12 @@ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64) @@ -295,12 +286,12 @@ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -72,9 +72,7 @@ ; CHECK-NEXT: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTTZ %0 %2:_(s32) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -18,12 +18,14 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; ; VI-LABEL: name: test_fabs_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; ; GFX9-LABEL: name: test_fabs_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -47,12 +49,14 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FABS]](s64) + ; ; VI-LABEL: name: test_fabs_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FABS]](s64) + ; ; GFX9-LABEL: name: test_fabs_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -77,6 +81,7 @@ ; SI-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fabs_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -85,6 +90,7 @@ ; VI-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fabs_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -115,6 +121,7 @@ ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fabs_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -124,6 +131,7 @@ ; VI-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fabs_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -154,6 +162,7 @@ ; SI-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[UV2]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32), [[FABS2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fabs_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -164,6 +173,7 @@ ; VI-NEXT: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[UV2]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FABS]](s32), [[FABS1]](s32), [[FABS2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fabs_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -194,6 +204,7 @@ ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FABS]](s64), [[FABS1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fabs_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -203,6 +214,7 @@ ; VI-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FABS]](s64), [[FABS1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fabs_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -229,12 +241,14 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; ; VI-LABEL: name: test_fabs_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fabs_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -268,19 +282,19 @@ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fabs_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) @@ -298,19 +312,19 @@ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]] ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fabs_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) @@ -353,6 +367,7 @@ ; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fabs_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -362,6 +377,7 @@ ; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fabs_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -18,6 +18,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; VI-LABEL: name: test_fadd_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -25,6 +26,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX9-LABEL: name: test_fadd_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -51,6 +53,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; VI-LABEL: name: test_fadd_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -58,6 +61,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; GFX9-LABEL: name: test_fadd_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -90,6 +94,7 @@ ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fadd_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -100,6 +105,7 @@ ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fadd_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -137,6 +143,7 @@ ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fadd_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -148,6 +155,7 @@ ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fadd_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -182,6 +190,7 @@ ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fadd_v2s32_flags ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -193,6 +202,7 @@ ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fadd_v2s32_flags ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -228,6 +238,7 @@ ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fadd_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -240,6 +251,7 @@ ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[UV2]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fadd_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -275,6 +287,7 @@ ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[UV3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fadd_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} @@ -286,6 +299,7 @@ ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[UV3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fadd_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -337,6 +351,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; VI-LABEL: name: test_fadd_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -359,6 +374,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fadd_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -425,13 +441,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fadd_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -471,13 +487,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; GFX9-LABEL: name: test_fadd_v3s16 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -583,6 +599,7 @@ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fadd_v4s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -623,6 +640,7 @@ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fadd_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -25,6 +25,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -39,6 +40,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -79,6 +81,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -90,6 +93,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -124,6 +128,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_copysign_s64_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -135,6 +140,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_copysign_s64_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -172,6 +178,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_copysign_s64_s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -186,6 +193,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_copysign_s64_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -226,6 +234,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s64 ; VI: liveins: $vgpr0, $vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -240,6 +249,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s64 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -282,6 +292,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s32 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -298,6 +309,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s32 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -343,6 +355,7 @@ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND2]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -358,6 +371,7 @@ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND2]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -394,14 +408,15 @@ ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; VI-LABEL: name: test_copysign_s64_s16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -410,14 +425,15 @@ ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; ; GFX9-LABEL: name: test_copysign_s64_s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -426,8 +442,8 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C2]] ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[C3]](s32) @@ -463,6 +479,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s64 ; VI: liveins: $vgpr0, $vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -479,6 +496,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s64 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -527,6 +545,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY1]], [[BITCAST]] ; SI-NEXT: [[OR2:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR2]](<2 x s16>) + ; ; VI-LABEL: name: test_copysign_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -545,6 +564,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY1]], [[BITCAST]] ; VI-NEXT: [[OR2:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR2]](<2 x s16>) + ; ; GFX9-LABEL: name: test_copysign_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -583,6 +603,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[BUILD_VECTOR]] ; SI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; VI-LABEL: name: test_copysign_v2s32_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -596,6 +617,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[BUILD_VECTOR]] ; VI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_copysign_v2s32_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -638,6 +660,7 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_copysign_v2s64_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; VI-NEXT: {{ $}} @@ -655,6 +678,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_copysign_v2s64_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} @@ -707,6 +731,7 @@ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_copysign_v2s64_v2s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -730,6 +755,7 @@ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[AND3]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_copysign_v2s64_v2s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -787,6 +813,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] ; SI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; VI-LABEL: name: test_copysign_v2s32_v2s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -808,6 +835,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR]] ; VI-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_copysign_v2s32_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -852,6 +880,7 @@ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND1]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s32_flagss ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -863,6 +892,7 @@ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND1]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s32_flagss ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -901,6 +931,7 @@ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND2]] ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; VI-LABEL: name: test_copysign_s32_s16_flags ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -916,6 +947,7 @@ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C]] ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = nnan G_OR [[AND]], [[AND2]] ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) + ; ; GFX9-LABEL: name: test_copysign_s32_s16_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -961,6 +993,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = nnan G_OR [[AND]], [[AND1]] ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_copysign_s16_s32_flags ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} @@ -977,6 +1010,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = nnan G_OR [[AND]], [[AND1]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_copysign_s16_s32_flags ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -19,6 +19,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; SI-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; VI-LABEL: name: test_fma_s32 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -27,6 +28,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; VI-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX9-LABEL: name: test_fma_s32 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; ; VI-LABEL: name: test_fma_s64 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -63,6 +66,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; ; GFX9-LABEL: name: test_fma_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -100,6 +104,7 @@ ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fma_s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -112,6 +117,7 @@ ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fma_s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -155,6 +161,7 @@ ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fma_v2s32 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; VI-NEXT: {{ $}} @@ -168,6 +175,7 @@ ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fma_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -208,6 +216,7 @@ ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fma_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; VI-NEXT: {{ $}} @@ -222,6 +231,7 @@ ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fma_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; GFX9-NEXT: {{ $}} @@ -264,6 +274,7 @@ ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; VI-LABEL: name: test_fma_v4s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; VI-NEXT: {{ $}} @@ -279,6 +290,7 @@ ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_fma_v4s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: {{ $}} @@ -320,6 +332,7 @@ ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fma_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; VI-NEXT: {{ $}} @@ -333,6 +346,7 @@ ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fma_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX9-NEXT: {{ $}} @@ -394,6 +408,7 @@ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; ; VI-LABEL: name: test_fma_v2s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 ; VI-NEXT: {{ $}} @@ -421,6 +436,7 @@ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fma_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -501,13 +517,13 @@ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; ; VI-LABEL: name: test_fma_v3s16 ; VI: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 ; VI-NEXT: {{ $}} @@ -555,13 +571,13 @@ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] ; VI-NEX