Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -336,6 +336,9 @@ LegalizeResult lowerLoad(MachineInstr &MI); LegalizeResult lowerStore(MachineInstr &MI); LegalizeResult lowerBitCount(MachineInstr &MI); + LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI); + LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI); + LegalizeResult lowerFunnelShift(MachineInstr &MI); LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI); LegalizeResult lowerUITOFP(MachineInstr &MI); Index: llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1403,6 +1403,13 @@ return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags); } + /// Build and insert \p Res = G_UREM \p Op0, \p Op1 + MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_UREM, {Dst}, {Src0, Src1}, Flags); + } + MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional Flags = None) { Index: llvm/include/llvm/CodeGen/GlobalISel/Utils.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -38,6 +38,7 @@ class TargetRegisterInfo; class TargetRegisterClass; class ConstantFP; +class ConstantInt; class APFloat; /// Try to constrain Reg to the specified register class. If this fails, @@ -144,6 +145,8 @@ bool HandleFConstants = true); const ConstantFP* getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI); +const ConstantInt *getConstantIntVRegVal(Register VReg, + const MachineRegisterInfo &MRI); /// See if Reg is defined by an single def instruction that is /// Opcode. Also try to do trivial folding if it's a COPY with @@ -241,6 +244,14 @@ bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI); +/// Attempt to match a unary predicate against a scalar/splat constant or every +/// element of a constant G_BUILD_VECTOR/G_BUILD_VECTOR_TRUNC. If AllowUndef is +/// true, then G_IMPLICIT_DEF elements will pass nullptr to Match. +bool matchUnaryPredicate( + const MachineRegisterInfo &MRI, Register Reg, + std::function Match, + bool AllowUndefs); + /// Returns true if given the TargetLowering's boolean contents information, /// the value \p Val contains a true value. bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3110,6 +3110,9 @@ MI.eraseFromParent(); return Legalized; } + case G_FSHL: + case G_FSHR: + return lowerFunnelShift(MI); } } @@ -4962,6 +4965,130 @@ } } +// Check that (every element of) Reg is undef or not an exact multiple of BW. +static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, + Register Reg, unsigned BW) { + return matchUnaryPredicate( + MRI, Reg, + [=](const MachineRegisterInfo &MRI, Register R) { + const ConstantInt *C = getConstantIntVRegVal(R, MRI); + return !C || C->getValue().urem(BW) != 0; + }, + true); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register X = MI.getOperand(1).getReg(); + Register Y = MI.getOperand(2).getReg(); + Register Z = MI.getOperand(3).getReg(); + LLT Ty = MRI.getType(Dst); + LLT ShTy = MRI.getType(Z); + + unsigned BW = Ty.getScalarSizeInBits(); + + const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; + unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL; + + if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) { + // fshl X, Y, Z -> fshr X, Y, -Z + // fshr X, Y, Z -> fshl X, Y, -Z + auto Zero = MIRBuilder.buildConstant(ShTy, 0); + Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0); + } else { + // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z + // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z + auto One = MIRBuilder.buildConstant(ShTy, 1); + if (IsFSHL) { + Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0); + X = MIRBuilder.buildLShr(Ty, X, One).getReg(0); + } else { + X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0); + Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0); + } + + Z = MIRBuilder.buildNot(ShTy, Z).getReg(0); + } + + MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z}); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register X = MI.getOperand(1).getReg(); + Register Y = MI.getOperand(2).getReg(); + Register Z = MI.getOperand(3).getReg(); + LLT Ty = MRI.getType(Dst); + LLT ShTy = MRI.getType(Z); + + const unsigned BW = Ty.getScalarSizeInBits(); + + const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; + + Register ShX, ShY; + Register ShAmt, InvShAmt; + + if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) { + // fshl: X << C | Y >> (BW - C) + // fshr: X << (BW - C) | Y >> C + // where C = Z % BW is not zero + auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW); + ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0); + InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0); + ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0); + ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0); + } else { + // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) + // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW) + auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1); + if (isPowerOf2_32(BW)) { + // Z % BW -> Z & (BW - 1) + ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0); + // (BW - 1) - (Z % BW) -> ~Z & (BW - 1) + auto NotZ = MIRBuilder.buildNot(ShTy, Z); + InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0); + } else { + auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW); + ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0); + InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0); + } + + auto One = MIRBuilder.buildConstant(ShTy, 1); + if (IsFSHL) { + ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0); + auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One); + ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0); + } else { + auto ShX1 = MIRBuilder.buildShl(Ty, X, One); + ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0); + ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0); + } + } + + MIRBuilder.buildOr(Dst, ShX, ShY); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFunnelShift(MachineInstr &MI) { + // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + LLT ShTy = MRI.getType(MI.getOperand(3).getReg()); + + bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL; + unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL; + if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower) + return lowerFunnelShiftAsShifts(MI); + return lowerFunnelShiftWithInverse(MI); +} + // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float // representation. LegalizerHelper::LegalizeResult Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -351,6 +351,14 @@ return MI->getOperand(1).getFPImm(); } +const ConstantInt *llvm::getConstantIntVRegVal(Register VReg, + const MachineRegisterInfo &MRI) { + MachineInstr *MI = MRI.getVRegDef(VReg); + if (MI->getOpcode() != TargetOpcode::G_CONSTANT) + return nullptr; + return MI->getOperand(1).getCImm(); +} + namespace { struct DefinitionAndSourceRegister { MachineInstr *MI; @@ -730,6 +738,38 @@ return isBuildVectorConstantSplat(MI, MRI, -1); } +bool llvm::matchUnaryPredicate( + const MachineRegisterInfo &MRI, Register Reg, + std::function Match, + bool AllowUndefs) { + + if (Match(MRI, Reg)) + return true; + + const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + if (AllowUndefs && Def->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) + return true; + + if (Def->getOpcode() == TargetOpcode::G_BUILD_VECTOR || + Def->getOpcode() == TargetOpcode::G_BUILD_VECTOR_TRUNC) { + for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { + Register SrcElt = Def->getOperand(I).getReg(); + if (Match(MRI, SrcElt)) + continue; + + if (AllowUndefs) { + const MachineInstr *SrcDef = getDefIgnoringCopies(SrcElt, MRI); + if (SrcDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) + continue; + } + + return false; + } + } + + return true; +} + bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP) { switch (TLI.getBooleanContents(IsVector, IsFP)) { Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1594,8 +1594,11 @@ .clampScalar(0, S32, S64) .lower(); + // TODO: Only Try to form v2s16 with legal packed instructions. getActionDefinitionsBuilder(G_FSHR) .legalFor({{S32, S32}}) + .lowerFor({{V2S16, V2S16}}) + .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) .scalarize(0) .lower(); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -0,0 +1,7594 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GFX6,GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GFX8,GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9,GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10,GCN %s + +define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) { +; GFX6-LABEL: s_fshr_i7: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: s_sub_i32 s3, 0, 7 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_lo_u32 v1, s3, v0 +; GFX6-NEXT: s_movk_i32 s3, 0x7f +; GFX6-NEXT: s_and_b32 s2, s2, s3 +; GFX6-NEXT: s_and_b32 s1, s1, s3 +; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0 +; GFX6-NEXT: v_and_b32_e32 v0, s3, v0 +; GFX6-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 +; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i7: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX8-NEXT: s_sub_i32 s3, 0, 7 +; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s4 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_lo_u32 v1, s3, v0 +; GFX8-NEXT: s_movk_i32 s3, 0x7f +; GFX8-NEXT: s_and_b32 s2, s2, s3 +; GFX8-NEXT: s_and_b32 s1, s1, s3 +; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 +; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0 +; GFX8-NEXT: v_and_b32_e32 v0, s3, v0 +; GFX8-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 +; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s1 +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i7: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: s_sub_i32 s3, 0, 7 +; GFX9-NEXT: s_bfe_u32 s4, 1, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s4 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, s3, v0 +; GFX9-NEXT: s_movk_i32 s3, 0x7f +; GFX9-NEXT: s_and_b32 s2, s2, s3 +; GFX9-NEXT: s_and_b32 s1, s1, s3 +; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7 +; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 +; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0 +; GFX9-NEXT: v_and_b32_e32 v0, s3, v0 +; GFX9-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0 +; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s1 +; GFX9-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i7: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 +; GFX10-NEXT: s_sub_i32 s3, 0, 7 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, s3, v0 +; GFX10-NEXT: s_movk_i32 s3, 0x7f +; GFX10-NEXT: s_and_b32 s2, s2, s3 +; GFX10-NEXT: s_and_b32 s1, s1, s3 +; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 +; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX10-NEXT: v_sub_nc_u16_e64 v1, 6, v0 +; GFX10-NEXT: v_and_b32_e32 v0, s3, v0 +; GFX10-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX10-NEXT: v_lshrrev_b16_e64 v0, v0, s1 +; GFX10-NEXT: v_lshlrev_b16_e64 v1, v1, s0 +; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) + ret i7 %result +} + +define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) { +; GFX6-LABEL: v_fshr_i7: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX6-NEXT: s_sub_i32 s4, 0, 7 +; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX6-NEXT: v_mul_lo_u32 v4, s4, v3 +; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX6-NEXT: v_mov_b32_e32 v4, 0x7f +; GFX6-NEXT: v_and_b32_e32 v1, v1, v4 +; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2 +; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX6-NEXT: v_and_b32_e32 v3, v3, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i7: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX8-NEXT: s_sub_i32 s4, 0, 7 +; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX8-NEXT: v_mul_lo_u32 v4, s4, v3 +; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 +; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX8-NEXT: v_mov_b32_e32 v4, 0x7f +; GFX8-NEXT: v_and_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7 +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2 +; GFX8-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX8-NEXT: v_and_b32_e32 v3, v3, v4 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX9-NEXT: s_sub_i32 s4, 0, 7 +; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX9-NEXT: v_mul_lo_u32 v4, s4, v3 +; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 +; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7f +; GFX9-NEXT: v_and_b32_e32 v1, v1, v4 +; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 +; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX9-NEXT: v_and_b32_e32 v3, v3, v4 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 +; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i7: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 +; GFX10-NEXT: s_sub_i32 s4, 0, 7 +; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX10-NEXT: v_mul_lo_u32 v4, s4, v3 +; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 +; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v3, 0x7f +; GFX10-NEXT: v_sub_nc_u16_e64 v4, 6, v2 +; GFX10-NEXT: v_and_b32_e32 v2, v2, v3 +; GFX10-NEXT: v_and_b32_e32 v7, v4, v3 +; GFX10-NEXT: v_lshrrev_b16_e64 v1, v2, v1 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, v7, v0 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) + ret i7 %result +} + +define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) { +; GFX6-LABEL: s_fshr_i8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_and_b32 s3, s2, 7 +; GFX6-NEXT: s_and_b32 s1, s1, 0xff +; GFX6-NEXT: s_andn2_b32 s2, 7, s2 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_lshl_b32 s0, s0, s2 +; GFX6-NEXT: s_lshr_b32 s1, s1, s3 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s3, s2, 7 +; GFX8-NEXT: s_andn2_b32 s2, 7, s2 +; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000 +; GFX8-NEXT: s_and_b32 s1, s1, 0xff +; GFX8-NEXT: s_lshl_b32 s0, s0, s4 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, s3, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s1, s2 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s3, s2, 7 +; GFX9-NEXT: s_andn2_b32 s2, 7, s2 +; GFX9-NEXT: s_bfe_u32 s4, 1, 0x100000 +; GFX9-NEXT: s_and_b32 s1, s1, 0xff +; GFX9-NEXT: s_lshl_b32 s0, s0, s4 +; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_bfe_u32 s2, s3, 0x100000 +; GFX9-NEXT: s_lshr_b32 s1, s1, s2 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i8: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_and_b32 s3, s2, 7 +; GFX10-NEXT: s_andn2_b32 s2, 7, s2 +; GFX10-NEXT: s_and_b32 s1, s1, 0xff +; GFX10-NEXT: s_bfe_u32 s4, 1, 0x100000 +; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s4 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_lshr_b32 s1, s1, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: ; return to shader part epilog + %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) + ret i8 %result +} + +define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) { +; GFX6-LABEL: v_fshr_i8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_and_b32_e32 v3, 7, v2 +; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v3, 7, v2 +; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v3, 7, v2 +; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i8: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 +; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 +; GFX10-NEXT: v_lshrrev_b16_e64 v1, v2, v1 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, v3, v0 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) + ret i8 %result +} + +define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) { +; GFX6-LABEL: s_fshr_i8_4: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_and_b32 s1, s1, 0xff +; GFX6-NEXT: s_lshl_b32 s0, s0, 3 +; GFX6-NEXT: s_lshr_b32 s1, s1, 4 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i8_4: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX8-NEXT: s_and_b32 s1, s1, 0xff +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s2, 3, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, 4, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s1, s2 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i8_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX9-NEXT: s_and_b32 s1, s1, 0xff +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s2, 3, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_bfe_u32 s2, 4, 0x100000 +; GFX9-NEXT: s_lshr_b32 s1, s1, s2 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i8_4: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_and_b32 s1, s1, 0xff +; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_bfe_u32 s2, 3, 0x100000 +; GFX10-NEXT: s_bfe_u32 s3, 4, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_lshr_b32 s1, s1, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: ; return to shader part epilog + %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) + ret i8 %result +} + +define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) { +; GFX6-LABEL: v_fshr_i8_4: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 4, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i8_4: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, 4 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 3, v0 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i8_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 4 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 3, v0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i8_4: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 3, v0 +; GFX10-NEXT: v_lshrrev_b16_e64 v1, 4, v1 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) + ret i8 %result +} + +define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) { +; GFX6-LABEL: s_fshr_i8_5: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_and_b32 s1, s1, 0xff +; GFX6-NEXT: s_lshl_b32 s0, s0, 2 +; GFX6-NEXT: s_lshr_b32 s1, s1, 5 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i8_5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX8-NEXT: s_and_b32 s1, s1, 0xff +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s2, 2, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, 5, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s1, s2 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i8_5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX9-NEXT: s_and_b32 s1, s1, 0xff +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s2, 2, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_bfe_u32 s2, 5, 0x100000 +; GFX9-NEXT: s_lshr_b32 s1, s1, s2 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i8_5: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_and_b32 s1, s1, 0xff +; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_bfe_u32 s2, 2, 0x100000 +; GFX10-NEXT: s_bfe_u32 s3, 5, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_lshr_b32 s1, s1, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: ; return to shader part epilog + %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) + ret i8 %result +} + +define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) { +; GFX6-LABEL: v_fshr_i8_5: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 5, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i8_5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, 5 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 2, v0 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i8_5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 5 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 2, v0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i8_5: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 2, v0 +; GFX10-NEXT: v_lshrrev_b16_e64 v1, 5, v1 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) + ret i8 %result +} + +define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) { +; GFX6-LABEL: s_fshr_v2i8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_movk_i32 s7, 0xff +; GFX6-NEXT: s_lshr_b32 s3, s0, 8 +; GFX6-NEXT: s_lshr_b32 s4, s1, 8 +; GFX6-NEXT: s_lshr_b32 s5, s2, 8 +; GFX6-NEXT: s_and_b32 s6, s2, 7 +; GFX6-NEXT: s_and_b32 s1, s1, s7 +; GFX6-NEXT: s_andn2_b32 s2, 7, s2 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_lshl_b32 s0, s0, s2 +; GFX6-NEXT: s_lshr_b32 s1, s1, s6 +; GFX6-NEXT: s_andn2_b32 s2, 7, s5 +; GFX6-NEXT: s_lshl_b32 s3, s3, 1 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshl_b32 s2, s3, s2 +; GFX6-NEXT: s_and_b32 s1, s5, 7 +; GFX6-NEXT: s_and_b32 s3, s4, s7 +; GFX6-NEXT: s_lshr_b32 s1, s3, s1 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_and_b32 s1, s1, s7 +; GFX6-NEXT: s_and_b32 s0, s0, s7 +; GFX6-NEXT: s_lshl_b32 s1, s1, 8 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_v2i8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s6, s2, 7 +; GFX8-NEXT: s_lshr_b32 s5, s2, 8 +; GFX8-NEXT: s_andn2_b32 s2, 7, s2 +; GFX8-NEXT: s_bfe_u32 s7, 1, 0x100000 +; GFX8-NEXT: s_lshr_b32 s3, s0, 8 +; GFX8-NEXT: s_lshl_b32 s0, s0, s7 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_movk_i32 s2, 0xff +; GFX8-NEXT: s_lshr_b32 s4, s1, 8 +; GFX8-NEXT: s_and_b32 s1, s1, s2 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s1, s6 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: s_and_b32 s1, s5, 7 +; GFX8-NEXT: s_andn2_b32 s5, 7, s5 +; GFX8-NEXT: s_and_b32 s4, s4, s2 +; GFX8-NEXT: s_lshl_b32 s3, s3, s7 +; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s3, s3, s5 +; GFX8-NEXT: s_lshr_b32 s1, s4, s1 +; GFX8-NEXT: s_or_b32 s1, s3, s1 +; GFX8-NEXT: s_and_b32 s0, s0, s2 +; GFX8-NEXT: s_and_b32 s1, s1, s2 +; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000 +; GFX8-NEXT: s_lshl_b32 s1, s1, s2 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_v2i8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s6, s2, 7 +; GFX9-NEXT: s_lshr_b32 s5, s2, 8 +; GFX9-NEXT: s_andn2_b32 s2, 7, s2 +; GFX9-NEXT: s_bfe_u32 s7, 1, 0x100000 +; GFX9-NEXT: s_lshr_b32 s3, s0, 8 +; GFX9-NEXT: s_lshl_b32 s0, s0, s7 +; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_movk_i32 s2, 0xff +; GFX9-NEXT: s_lshr_b32 s4, s1, 8 +; GFX9-NEXT: s_and_b32 s1, s1, s2 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_bfe_u32 s6, s6, 0x100000 +; GFX9-NEXT: s_lshr_b32 s1, s1, s6 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: s_and_b32 s1, s5, 7 +; GFX9-NEXT: s_andn2_b32 s5, 7, s5 +; GFX9-NEXT: s_and_b32 s4, s4, s2 +; GFX9-NEXT: s_lshl_b32 s3, s3, s7 +; GFX9-NEXT: s_bfe_u32 s5, s5, 0x100000 +; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_lshl_b32 s3, s3, s5 +; GFX9-NEXT: s_lshr_b32 s1, s4, s1 +; GFX9-NEXT: s_or_b32 s1, s3, s1 +; GFX9-NEXT: s_and_b32 s0, s0, s2 +; GFX9-NEXT: s_and_b32 s1, s1, s2 +; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000 +; GFX9-NEXT: s_lshl_b32 s1, s1, s2 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_v2i8: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_lshr_b32 s5, s2, 8 +; GFX10-NEXT: s_and_b32 s6, s2, 7 +; GFX10-NEXT: s_andn2_b32 s2, 7, s2 +; GFX10-NEXT: s_bfe_u32 s7, 1, 0x100000 +; GFX10-NEXT: s_lshr_b32 s3, s0, 8 +; GFX10-NEXT: s_lshr_b32 s4, s1, 8 +; GFX10-NEXT: s_movk_i32 s8, 0xff +; GFX10-NEXT: s_lshl_b32 s0, s0, s7 +; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX10-NEXT: s_and_b32 s4, s4, s8 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_bfe_u32 s2, s6, 0x100000 +; GFX10-NEXT: s_and_b32 s6, s5, 7 +; GFX10-NEXT: s_andn2_b32 s5, 7, s5 +; GFX10-NEXT: s_and_b32 s1, s1, s8 +; GFX10-NEXT: s_lshl_b32 s3, s3, s7 +; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000 +; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: s_lshl_b32 s3, s3, s5 +; GFX10-NEXT: s_lshr_b32 s4, s4, s6 +; GFX10-NEXT: s_lshr_b32 s1, s1, s2 +; GFX10-NEXT: s_or_b32 s2, s3, s4 +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: s_and_b32 s1, s2, s8 +; GFX10-NEXT: s_bfe_u32 s2, 8, 0x100000 +; GFX10-NEXT: s_and_b32 s0, s0, s8 +; GFX10-NEXT: s_lshl_b32 s1, s1, s2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: ; return to shader part epilog + %lhs = bitcast i16 %lhs.arg to <2 x i8> + %rhs = bitcast i16 %rhs.arg to <2 x i8> + %amt = bitcast i16 %amt.arg to <2 x i8> + %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) + %cast.result = bitcast <2 x i8> %result to i16 + ret i16 %cast.result +} + +define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) { +; GFX6-LABEL: v_fshr_v2i8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v5, 8, v2 +; GFX6-NEXT: v_and_b32_e32 v6, 7, v2 +; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: s_movk_i32 s4, 0xff +; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_xor_b32_e32 v2, -1, v5 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v6, v1 +; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_and_b32_e32 v1, 7, v5 +; GFX6-NEXT: v_and_b32_e32 v3, s4, v4 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v1, v3 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v2i8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 +; GFX8-NEXT: v_and_b32_e32 v6, 7, v2 +; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 +; GFX8-NEXT: v_xor_b32_e32 v2, -1, v5 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_and_b32_e32 v1, 7, v5 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 +; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v3 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v2i8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 +; GFX9-NEXT: v_and_b32_e32 v6, 7, v2 +; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 +; GFX9-NEXT: v_xor_b32_e32 v2, -1, v5 +; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: v_and_b32_e32 v1, 7, v5 +; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3 +; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v3 +; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 +; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v2i8: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX10-NEXT: s_movk_i32 s4, 0xff +; GFX10-NEXT: v_and_b32_e32 v7, 7, v2 +; GFX10-NEXT: v_xor_b32_e32 v6, -1, v3 +; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 +; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 +; GFX10-NEXT: v_lshlrev_b16_e64 v4, 1, v4 +; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 +; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX10-NEXT: v_lshrrev_b16_e64 v3, v3, v5 +; GFX10-NEXT: v_lshlrev_b16_e64 v4, v6, v4 +; GFX10-NEXT: v_lshrrev_b16_e64 v1, v7, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_lshlrev_b16_e64 v0, v2, v0 +; GFX10-NEXT: v_or_b32_e32 v2, v4, v3 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX10-NEXT: v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX10-NEXT: s_setpc_b64 s[30:31] + %lhs = bitcast i16 %lhs.arg to <2 x i8> + %rhs = bitcast i16 %rhs.arg to <2 x i8> + %amt = bitcast i16 %amt.arg to <2 x i8> + %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) + %cast.result = bitcast <2 x i8> %result to i16 + ret i16 %cast.result +} + +define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) { +; GFX6-LABEL: s_fshr_v4i8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_movk_i32 s13, 0xff +; GFX6-NEXT: s_lshr_b32 s3, s0, 8 +; GFX6-NEXT: s_lshr_b32 s4, s0, 16 +; GFX6-NEXT: s_lshr_b32 s5, s0, 24 +; GFX6-NEXT: s_lshr_b32 s6, s1, 8 +; GFX6-NEXT: s_lshr_b32 s7, s1, 16 +; GFX6-NEXT: s_lshr_b32 s8, s1, 24 +; GFX6-NEXT: s_lshr_b32 s9, s2, 8 +; GFX6-NEXT: s_lshr_b32 s10, s2, 16 +; GFX6-NEXT: s_lshr_b32 s11, s2, 24 +; GFX6-NEXT: s_and_b32 s12, s2, 7 +; GFX6-NEXT: s_and_b32 s1, s1, s13 +; GFX6-NEXT: s_andn2_b32 s2, 7, s2 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_lshl_b32 s0, s0, s2 +; GFX6-NEXT: s_lshr_b32 s1, s1, s12 +; GFX6-NEXT: s_andn2_b32 s2, 7, s9 +; GFX6-NEXT: s_lshl_b32 s3, s3, 1 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshl_b32 s2, s3, s2 +; GFX6-NEXT: s_and_b32 s1, s9, 7 +; GFX6-NEXT: s_and_b32 s3, s6, s13 +; GFX6-NEXT: s_lshr_b32 s1, s3, s1 +; GFX6-NEXT: s_andn2_b32 s3, 7, s10 +; GFX6-NEXT: s_lshl_b32 s4, s4, 1 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_lshl_b32 s3, s4, s3 +; GFX6-NEXT: s_and_b32 s2, s10, 7 +; GFX6-NEXT: s_and_b32 s4, s7, s13 +; GFX6-NEXT: s_lshr_b32 s2, s4, s2 +; GFX6-NEXT: s_and_b32 s1, s1, s13 +; GFX6-NEXT: s_or_b32 s2, s3, s2 +; GFX6-NEXT: s_and_b32 s3, s11, 7 +; GFX6-NEXT: s_andn2_b32 s4, 7, s11 +; GFX6-NEXT: s_lshl_b32 s5, s5, 1 +; GFX6-NEXT: s_and_b32 s0, s0, s13 +; GFX6-NEXT: s_lshl_b32 s1, s1, 8 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_and_b32 s1, s2, s13 +; GFX6-NEXT: s_lshl_b32 s4, s5, s4 +; GFX6-NEXT: s_lshr_b32 s3, s8, s3 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_or_b32 s3, s4, s3 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_and_b32 s1, s3, s13 +; GFX6-NEXT: s_lshl_b32 s1, s1, 24 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_v4i8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_lshr_b32 s9, s2, 8 +; GFX8-NEXT: s_lshr_b32 s10, s2, 16 +; GFX8-NEXT: s_lshr_b32 s11, s2, 24 +; GFX8-NEXT: s_and_b32 s12, s2, 7 +; GFX8-NEXT: s_andn2_b32 s2, 7, s2 +; GFX8-NEXT: s_movk_i32 s13, 0xff +; GFX8-NEXT: s_bfe_u32 s14, 1, 0x100000 +; GFX8-NEXT: s_lshr_b32 s3, s0, 8 +; GFX8-NEXT: s_lshr_b32 s4, s0, 16 +; GFX8-NEXT: s_lshr_b32 s5, s0, 24 +; GFX8-NEXT: s_lshr_b32 s6, s1, 8 +; GFX8-NEXT: s_lshr_b32 s7, s1, 16 +; GFX8-NEXT: s_lshr_b32 s8, s1, 24 +; GFX8-NEXT: s_and_b32 s1, s1, s13 +; GFX8-NEXT: s_lshl_b32 s0, s0, s14 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, s12, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s1, s2 +; GFX8-NEXT: s_andn2_b32 s2, 7, s9 +; GFX8-NEXT: s_lshl_b32 s3, s3, s14 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: s_and_b32 s1, s9, 7 +; GFX8-NEXT: s_lshl_b32 s2, s3, s2 +; GFX8-NEXT: s_and_b32 s3, s6, s13 +; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s3, s1 +; GFX8-NEXT: s_andn2_b32 s3, 7, s10 +; GFX8-NEXT: s_lshl_b32 s4, s4, s14 +; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX8-NEXT: s_or_b32 s1, s2, s1 +; GFX8-NEXT: s_and_b32 s2, s10, 7 +; GFX8-NEXT: s_lshl_b32 s3, s4, s3 +; GFX8-NEXT: s_and_b32 s4, s7, s13 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_lshr_b32 s2, s4, s2 +; GFX8-NEXT: s_andn2_b32 s4, 7, s11 +; GFX8-NEXT: s_and_b32 s1, s1, s13 +; GFX8-NEXT: s_or_b32 s2, s3, s2 +; GFX8-NEXT: s_and_b32 s3, s11, 7 +; GFX8-NEXT: s_lshl_b32 s5, s5, s14 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_lshl_b32 s4, s5, s4 +; GFX8-NEXT: s_bfe_u32 s5, s8, 0x100000 +; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX8-NEXT: s_and_b32 s0, s0, s13 +; GFX8-NEXT: s_lshl_b32 s1, s1, 8 +; GFX8-NEXT: s_lshr_b32 s3, s5, s3 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: s_and_b32 s1, s2, s13 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s3, s4, s3 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: s_and_b32 s1, s3, s13 +; GFX8-NEXT: s_lshl_b32 s1, s1, 24 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_v4i8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_lshr_b32 s9, s2, 8 +; GFX9-NEXT: s_lshr_b32 s10, s2, 16 +; GFX9-NEXT: s_lshr_b32 s11, s2, 24 +; GFX9-NEXT: s_and_b32 s12, s2, 7 +; GFX9-NEXT: s_andn2_b32 s2, 7, s2 +; GFX9-NEXT: s_movk_i32 s13, 0xff +; GFX9-NEXT: s_bfe_u32 s14, 1, 0x100000 +; GFX9-NEXT: s_lshr_b32 s3, s0, 8 +; GFX9-NEXT: s_lshr_b32 s4, s0, 16 +; GFX9-NEXT: s_lshr_b32 s5, s0, 24 +; GFX9-NEXT: s_lshr_b32 s6, s1, 8 +; GFX9-NEXT: s_lshr_b32 s7, s1, 16 +; GFX9-NEXT: s_lshr_b32 s8, s1, 24 +; GFX9-NEXT: s_and_b32 s1, s1, s13 +; GFX9-NEXT: s_lshl_b32 s0, s0, s14 +; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_bfe_u32 s2, s12, 0x100000 +; GFX9-NEXT: s_lshr_b32 s1, s1, s2 +; GFX9-NEXT: s_andn2_b32 s2, 7, s9 +; GFX9-NEXT: s_lshl_b32 s3, s3, s14 +; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: s_and_b32 s1, s9, 7 +; GFX9-NEXT: s_lshl_b32 s2, s3, s2 +; GFX9-NEXT: s_and_b32 s3, s6, s13 +; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_lshr_b32 s1, s3, s1 +; GFX9-NEXT: s_andn2_b32 s3, 7, s10 +; GFX9-NEXT: s_lshl_b32 s4, s4, s14 +; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX9-NEXT: s_or_b32 s1, s2, s1 +; GFX9-NEXT: s_and_b32 s2, s10, 7 +; GFX9-NEXT: s_lshl_b32 s3, s4, s3 +; GFX9-NEXT: s_and_b32 s4, s7, s13 +; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX9-NEXT: s_lshr_b32 s2, s4, s2 +; GFX9-NEXT: s_andn2_b32 s4, 7, s11 +; GFX9-NEXT: s_and_b32 s1, s1, s13 +; GFX9-NEXT: s_or_b32 s2, s3, s2 +; GFX9-NEXT: s_and_b32 s3, s11, 7 +; GFX9-NEXT: s_lshl_b32 s5, s5, s14 +; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX9-NEXT: s_lshl_b32 s4, s5, s4 +; GFX9-NEXT: s_bfe_u32 s5, s8, 0x100000 +; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX9-NEXT: s_and_b32 s0, s0, s13 +; GFX9-NEXT: s_lshl_b32 s1, s1, 8 +; GFX9-NEXT: s_lshr_b32 s3, s5, s3 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: s_and_b32 s1, s2, s13 +; GFX9-NEXT: s_lshl_b32 s1, s1, 16 +; GFX9-NEXT: s_or_b32 s3, s4, s3 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: s_and_b32 s1, s3, s13 +; GFX9-NEXT: s_lshl_b32 s1, s1, 24 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_v4i8: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_and_b32 s12, s2, 7 +; GFX10-NEXT: s_lshr_b32 s9, s2, 8 +; GFX10-NEXT: s_lshr_b32 s10, s2, 16 +; GFX10-NEXT: s_lshr_b32 s11, s2, 24 +; GFX10-NEXT: s_andn2_b32 s2, 7, s2 +; GFX10-NEXT: s_bfe_u32 s13, 1, 0x100000 +; GFX10-NEXT: s_lshr_b32 s3, s0, 8 +; GFX10-NEXT: s_lshr_b32 s4, s0, 16 +; GFX10-NEXT: s_lshr_b32 s5, s0, 24 +; GFX10-NEXT: s_lshr_b32 s6, s1, 8 +; GFX10-NEXT: s_movk_i32 s14, 0xff +; GFX10-NEXT: s_lshl_b32 s0, s0, s13 +; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX10-NEXT: s_and_b32 s6, s6, s14 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_and_b32 s2, s9, 7 +; GFX10-NEXT: s_andn2_b32 s9, 7, s9 +; GFX10-NEXT: s_lshr_b32 s7, s1, 16 +; GFX10-NEXT: s_lshr_b32 s8, s1, 24 +; GFX10-NEXT: s_and_b32 s1, s1, s14 +; GFX10-NEXT: s_bfe_u32 s12, s12, 0x100000 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: s_lshl_b32 s3, s3, s13 +; GFX10-NEXT: s_bfe_u32 s9, s9, 0x100000 +; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 +; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX10-NEXT: s_lshr_b32 s1, s1, s12 +; GFX10-NEXT: s_lshr_b32 s2, s6, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, s9 +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: s_or_b32 s1, s3, s2 +; GFX10-NEXT: s_and_b32 s6, s7, s14 +; GFX10-NEXT: s_and_b32 s2, s10, 7 +; GFX10-NEXT: s_andn2_b32 s3, 7, s10 +; GFX10-NEXT: s_lshl_b32 s4, s4, s13 +; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 +; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX10-NEXT: s_lshl_b32 s3, s4, s3 +; GFX10-NEXT: s_lshr_b32 s2, s6, s2 +; GFX10-NEXT: s_and_b32 s4, s11, 7 +; GFX10-NEXT: s_andn2_b32 s6, 7, s11 +; GFX10-NEXT: s_lshl_b32 s5, s5, s13 +; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 +; GFX10-NEXT: s_bfe_u32 s7, s8, 0x100000 +; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX10-NEXT: s_or_b32 s2, s3, s2 +; GFX10-NEXT: s_and_b32 s1, s1, s14 +; GFX10-NEXT: s_lshl_b32 s5, s5, s6 +; GFX10-NEXT: s_lshr_b32 s4, s7, s4 +; GFX10-NEXT: s_and_b32 s0, s0, s14 +; GFX10-NEXT: s_lshl_b32 s1, s1, 8 +; GFX10-NEXT: s_or_b32 s3, s5, s4 +; GFX10-NEXT: s_and_b32 s2, s2, s14 +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: s_lshl_b32 s1, s2, 16 +; GFX10-NEXT: s_and_b32 s2, s3, s14 +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: s_lshl_b32 s1, s2, 24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: ; return to shader part epilog + %lhs = bitcast i32 %lhs.arg to <4 x i8> + %rhs = bitcast i32 %rhs.arg to <4 x i8> + %amt = bitcast i32 %amt.arg to <4 x i8> + %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) + %cast.result = bitcast <4 x i8> %result to i32 + ret i32 %cast.result +} + +define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { +; GFX6-LABEL: v_fshr_v4i8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v9, 8, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v11, 24, v2 +; GFX6-NEXT: v_and_b32_e32 v12, 7, v2 +; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX6-NEXT: s_movk_i32 s4, 0xff +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v6, 8, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v8, 24, v1 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v12, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_and_b32_e32 v1, 7, v9 +; GFX6-NEXT: v_xor_b32_e32 v9, -1, v9 +; GFX6-NEXT: v_and_b32_e32 v6, s4, v6 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v1, v6 +; GFX6-NEXT: v_xor_b32_e32 v6, -1, v10 +; GFX6-NEXT: v_and_b32_e32 v9, 7, v9 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 +; GFX6-NEXT: v_mov_b32_e32 v2, 0xff +; GFX6-NEXT: v_lshlrev_b32_e32 v3, v9, v3 +; GFX6-NEXT: v_and_b32_e32 v6, 7, v6 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 +; GFX6-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, v6, v4 +; GFX6-NEXT: v_and_b32_e32 v3, 7, v10 +; GFX6-NEXT: v_and_b32_e32 v6, v7, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, v3, v6 +; GFX6-NEXT: v_xor_b32_e32 v6, -1, v11 +; GFX6-NEXT: v_and_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_or_b32_e32 v3, v4, v3 +; GFX6-NEXT: v_and_b32_e32 v4, 7, v11 +; GFX6-NEXT: v_and_b32_e32 v6, 7, v6 +; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 +; GFX6-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_and_b32_e32 v1, v3, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v5, v6, v5 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v4, v8 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v4, v5, v4 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_and_b32_e32 v1, v4, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v4i8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v8, 7, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2 +; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v0 +; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v9 +; GFX8-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v8 +; GFX8-NEXT: v_and_b32_e32 v8, 7, v5 +; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 +; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 +; GFX8-NEXT: v_lshlrev_b16_e32 v3, v5, v3 +; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_xor_b32_e32 v5, -1, v6 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX8-NEXT: v_and_b32_e32 v4, 7, v6 +; GFX8-NEXT: v_mov_b32_e32 v6, 1 +; GFX8-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 +; GFX8-NEXT: v_mov_b32_e32 v9, 0xff +; GFX8-NEXT: v_lshlrev_b16_e32 v5, v5, v8 +; GFX8-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v8 +; GFX8-NEXT: v_or_b32_e32 v4, v5, v4 +; GFX8-NEXT: v_and_b32_e32 v5, 7, v7 +; GFX8-NEXT: v_xor_b32_e32 v7, -1, v7 +; GFX8-NEXT: v_and_b32_e32 v7, 7, v7 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v2, s4, v4 +; GFX8-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v4i8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v8, 7, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2 +; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX9-NEXT: v_lshlrev_b16_e32 v9, 1, v0 +; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v9 +; GFX9-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX9-NEXT: v_or_b32_e32 v2, v2, v8 +; GFX9-NEXT: v_and_b32_e32 v8, 7, v5 +; GFX9-NEXT: v_xor_b32_e32 v5, -1, v5 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 +; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3 +; GFX9-NEXT: v_lshlrev_b16_e32 v3, v5, v3 +; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_xor_b32_e32 v5, -1, v6 +; GFX9-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX9-NEXT: v_and_b32_e32 v4, 7, v6 +; GFX9-NEXT: v_mov_b32_e32 v6, 1 +; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 +; GFX9-NEXT: v_mov_b32_e32 v9, 0xff +; GFX9-NEXT: v_lshlrev_b16_e32 v5, v5, v8 +; GFX9-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b16_e32 v4, v4, v8 +; GFX9-NEXT: v_or_b32_e32 v4, v5, v4 +; GFX9-NEXT: v_and_b32_e32 v5, 7, v7 +; GFX9-NEXT: v_xor_b32_e32 v7, -1, v7 +; GFX9-NEXT: v_and_b32_e32 v7, 7, v7 +; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, v7, v0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1 +; GFX9-NEXT: v_and_b32_e32 v2, s4, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v4i8: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX10-NEXT: v_xor_b32_e32 v8, -1, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX10-NEXT: v_xor_b32_e32 v11, -1, v6 +; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2 +; GFX10-NEXT: v_and_b32_e32 v15, 7, v8 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_and_b32_e32 v14, 7, v11 +; GFX10-NEXT: v_lshlrev_b16_e64 v3, 1, v3 +; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, v15, v0 +; GFX10-NEXT: v_mov_b32_e32 v15, 0xff +; GFX10-NEXT: v_lshlrev_b16_e64 v3, v14, v3 +; GFX10-NEXT: v_xor_b32_e32 v14, -1, v12 +; GFX10-NEXT: s_movk_i32 s4, 0xff +; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 +; GFX10-NEXT: v_and_b32_e32 v8, s4, v1 +; GFX10-NEXT: v_and_b32_sdwa v1, v1, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 +; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 +; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 +; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 +; GFX10-NEXT: v_lshlrev_b16_e64 v4, 1, v4 +; GFX10-NEXT: v_and_b32_e32 v15, 7, v14 +; GFX10-NEXT: v_lshlrev_b16_e64 v5, 1, v5 +; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 +; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX10-NEXT: v_lshrrev_b16_e64 v6, v6, v7 +; GFX10-NEXT: v_lshlrev_b16_e64 v4, v11, v4 +; GFX10-NEXT: v_lshrrev_b16_e64 v1, v10, v1 +; GFX10-NEXT: v_lshlrev_b16_e64 v5, v15, v5 +; GFX10-NEXT: v_lshrrev_b16_e64 v7, v12, v9 +; GFX10-NEXT: v_lshrrev_b16_e64 v2, v2, v8 +; GFX10-NEXT: v_or_b32_e32 v3, v3, v6 +; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX10-NEXT: v_mov_b32_e32 v6, 8 +; GFX10-NEXT: v_or_b32_e32 v4, v5, v7 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX10-NEXT: v_and_b32_e32 v3, s4, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %lhs = bitcast i32 %lhs.arg to <4 x i8> + %rhs = bitcast i32 %rhs.arg to <4 x i8> + %amt = bitcast i32 %amt.arg to <4 x i8> + %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) + %cast.result = bitcast <4 x i8> %result to i32 + ret i32 %cast.result +} + +define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) { +; GFX6-LABEL: s_fshr_i24: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: s_sub_i32 s3, 0, 24 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_lo_u32 v1, s3, v0 +; GFX6-NEXT: s_mov_b32 s3, 0xffffff +; GFX6-NEXT: s_and_b32 s2, s2, s3 +; GFX6-NEXT: s_and_b32 s1, s1, s3 +; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0 +; GFX6-NEXT: v_and_b32_e32 v0, s3, v0 +; GFX6-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 +; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i24: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX8-NEXT: s_sub_i32 s3, 0, 24 +; GFX8-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_lo_u32 v1, s3, v0 +; GFX8-NEXT: s_mov_b32 s3, 0xffffff +; GFX8-NEXT: s_and_b32 s2, s2, s3 +; GFX8-NEXT: s_and_b32 s1, s1, s3 +; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 +; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0 +; GFX8-NEXT: v_and_b32_e32 v0, s3, v0 +; GFX8-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s1 +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i24: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: s_sub_i32 s3, 0, 24 +; GFX9-NEXT: s_lshl_b32 s0, s0, 1 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, s3, v0 +; GFX9-NEXT: s_mov_b32 s3, 0xffffff +; GFX9-NEXT: s_and_b32 s2, s2, s3 +; GFX9-NEXT: s_and_b32 s1, s1, s3 +; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 +; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0 +; GFX9-NEXT: v_and_b32_e32 v0, s3, v0 +; GFX9-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s1 +; GFX9-NEXT: v_lshl_or_b32 v0, s0, v1, v0 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i24: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 +; GFX10-NEXT: s_sub_i32 s3, 0, 24 +; GFX10-NEXT: s_lshl_b32 s0, s0, 1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, s3, v0 +; GFX10-NEXT: s_mov_b32 s3, 0xffffff +; GFX10-NEXT: s_and_b32 s2, s2, s3 +; GFX10-NEXT: s_and_b32 s1, s1, s3 +; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 +; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0 +; GFX10-NEXT: v_and_b32_e32 v0, s3, v0 +; GFX10-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s1 +; GFX10-NEXT: v_lshl_or_b32 v0, s0, v1, v0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt) + ret i24 %result +} + +define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) { +; GFX6-LABEL: v_fshr_i24: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX6-NEXT: s_sub_i32 s4, 0, 24 +; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX6-NEXT: v_mul_lo_u32 v4, s4, v3 +; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffff +; GFX6-NEXT: v_and_b32_e32 v1, v1, v4 +; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2 +; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX6-NEXT: v_and_b32_e32 v3, v3, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i24: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX8-NEXT: s_sub_i32 s4, 0, 24 +; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX8-NEXT: v_mul_lo_u32 v4, s4, v3 +; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 +; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffff +; GFX8-NEXT: v_and_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24 +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2 +; GFX8-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX8-NEXT: v_and_b32_e32 v3, v3, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, v3, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i24: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX9-NEXT: s_sub_i32 s4, 0, 24 +; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX9-NEXT: v_mul_lo_u32 v4, s4, v3 +; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 +; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffff +; GFX9-NEXT: v_and_b32_e32 v1, v1, v4 +; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX9-NEXT: v_and_b32_e32 v3, v3, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v1, v2, v1 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, v3, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i24: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 +; GFX10-NEXT: s_sub_i32 s4, 0, 24 +; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX10-NEXT: v_mul_lo_u32 v4, s4, v3 +; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffffff +; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX10-NEXT: v_and_b32_e32 v1, v1, v4 +; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2 +; GFX10-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX10-NEXT: v_and_b32_e32 v3, v3, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v1, v2, v1 +; GFX10-NEXT: v_lshl_or_b32 v0, v0, v3, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt) + ret i24 %result +} + +define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { +; GFX6-LABEL: s_fshr_v2i24: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: s_sub_i32 s6, 0, 24 +; GFX6-NEXT: s_mov_b32 s7, 0xffffff +; GFX6-NEXT: s_and_b32 s4, s4, s7 +; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: v_mul_lo_u32 v2, s6, v1 +; GFX6-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GFX6-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX6-NEXT: v_mul_lo_u32 v2, s6, v0 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: s_and_b32 s4, s5, s7 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v1 +; GFX6-NEXT: v_and_b32_e32 v3, s7, v3 +; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v3 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX6-NEXT: v_and_b32_e32 v1, s7, v1 +; GFX6-NEXT: s_and_b32 s0, s2, s7 +; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0 +; GFX6-NEXT: s_lshl_b32 s0, s1, 1 +; GFX6-NEXT: v_and_b32_e32 v2, s7, v2 +; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 +; GFX6-NEXT: v_and_b32_e32 v0, s7, v0 +; GFX6-NEXT: s_and_b32 s0, s3, s7 +; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v1 +; GFX6-NEXT: s_movk_i32 s0, 0xff +; GFX6-NEXT: v_and_b32_e32 v2, s0, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v1, s0, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_and_b32_e32 v2, s0, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX6-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_and_b32_e32 v2, s0, v5 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_and_b32_e32 v1, s0, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_v2i24: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX8-NEXT: s_sub_i32 s6, 0, 24 +; GFX8-NEXT: s_mov_b32 s7, 0xffffff +; GFX8-NEXT: s_and_b32 s4, s4, s7 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX8-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-NEXT: v_mul_lo_u32 v2, s6, v1 +; GFX8-NEXT: v_mov_b32_e32 v4, 8 +; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 +; GFX8-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX8-NEXT: v_mul_lo_u32 v2, s6, v0 +; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1 +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v1 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: s_and_b32 s4, s5, s7 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v1 +; GFX8-NEXT: v_and_b32_e32 v3, s7, v3 +; GFX8-NEXT: v_lshlrev_b32_e64 v2, v3, s0 +; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX8-NEXT: v_and_b32_e32 v1, s7, v1 +; GFX8-NEXT: s_and_b32 s0, s2, s7 +; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s0 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0 +; GFX8-NEXT: s_lshl_b32 s0, s1, 1 +; GFX8-NEXT: v_and_b32_e32 v2, s7, v2 +; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX8-NEXT: v_and_b32_e32 v0, s7, v0 +; GFX8-NEXT: s_and_b32 s0, s3, s7 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s0 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v1 +; GFX8-NEXT: s_movk_i32 s0, 0xff +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_mov_b32_e32 v4, s0 +; GFX8-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_and_b32_e32 v2, s0, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_readfirstlane_b32 s0, v1 +; GFX8-NEXT: v_readfirstlane_b32 s1, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_v2i24: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: s_sub_i32 s6, 0, 24 +; GFX9-NEXT: s_mov_b32 s7, 0xffffff +; GFX9-NEXT: s_and_b32 s4, s4, s7 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: s_and_b32 s5, s5, s7 +; GFX9-NEXT: v_mul_lo_u32 v2, s6, v1 +; GFX9-NEXT: s_and_b32 s2, s2, s7 +; GFX9-NEXT: v_mul_lo_u32 v3, s6, v0 +; GFX9-NEXT: s_lshl_b32 s0, s0, 1 +; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 +; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v3 +; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1 +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1 +; GFX9-NEXT: v_and_b32_e32 v1, s7, v1 +; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0 +; GFX9-NEXT: v_and_b32_e32 v2, s7, v2 +; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s2 +; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 +; GFX9-NEXT: s_lshl_b32 s0, s1, 1 +; GFX9-NEXT: v_and_b32_e32 v0, s7, v0 +; GFX9-NEXT: s_and_b32 s1, s3, s7 +; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s1 +; GFX9-NEXT: v_and_b32_e32 v2, s7, v2 +; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0 +; GFX9-NEXT: s_movk_i32 s1, 0xff +; GFX9-NEXT: v_and_b32_e32 v4, s1, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX9-NEXT: v_and_or_b32 v0, v3, s1, v0 +; GFX9-NEXT: v_and_or_b32 v2, v1, s1, v2 +; GFX9-NEXT: v_and_b32_sdwa v1, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX9-NEXT: v_or3_b32 v1, v2, v1, v4 +; GFX9-NEXT: v_or3_b32 v0, v0, 0, 0 +; GFX9-NEXT: v_readfirstlane_b32 s0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s1, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_v2i24: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 +; GFX10-NEXT: s_sub_i32 s6, 0, 24 +; GFX10-NEXT: s_lshl_b32 s0, s0, 1 +; GFX10-NEXT: s_lshl_b32 s1, s1, 1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_lo_u32 v2, s6, v1 +; GFX10-NEXT: v_mul_lo_u32 v3, s6, v0 +; GFX10-NEXT: s_mov_b32 s6, 0xffffff +; GFX10-NEXT: s_and_b32 s4, s4, s6 +; GFX10-NEXT: s_and_b32 s5, s5, s6 +; GFX10-NEXT: s_and_b32 s2, s2, s6 +; GFX10-NEXT: s_and_b32 s3, s3, s6 +; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX10-NEXT: v_mul_hi_u32 v3, v0, v3 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v3 +; GFX10-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, s5, v0 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX10-NEXT: v_and_b32_e32 v1, s6, v1 +; GFX10-NEXT: v_and_b32_e32 v2, s6, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX10-NEXT: v_and_b32_e32 v0, s6, v0 +; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s2 +; GFX10-NEXT: v_and_b32_e32 v3, s6, v3 +; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s3 +; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1 +; GFX10-NEXT: s_movk_i32 s0, 0xff +; GFX10-NEXT: v_lshl_or_b32 v0, s1, v3, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v1 +; GFX10-NEXT: s_mov_b32 s1, 8 +; GFX10-NEXT: v_and_b32_sdwa v5, v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_and_b32_sdwa v3, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX10-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX10-NEXT: v_and_or_b32 v7, v1, s0, v2 +; GFX10-NEXT: v_and_or_b32 v2, v4, s0, v3 +; GFX10-NEXT: v_or3_b32 v0, v7, v5, v0 +; GFX10-NEXT: v_or3_b32 v1, v2, 0, 0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: ; return to shader part epilog + %lhs = bitcast i48 %lhs.arg to <2 x i24> + %rhs = bitcast i48 %rhs.arg to <2 x i24> + %amt = bitcast i48 %amt.arg to <2 x i24> + %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) + %cast.result = bitcast <2 x i24> %result to i48 + ret i48 %cast.result +} + +define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { +; GFX6-LABEL: v_fshr_v2i24: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX6-NEXT: s_sub_i32 s4, 0, 24 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v8, 24 +; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_mul_lo_u32 v7, s4, v6 +; GFX6-NEXT: v_mul_hi_u32 v7, v6, v7 +; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v7, v8 +; GFX6-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX6-NEXT: v_and_b32_e32 v5, v5, v8 +; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX6-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; GFX6-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX6-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 +; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX6-NEXT: v_mul_lo_u32 v6, s4, v7 +; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 23, v4 +; GFX6-NEXT: v_and_b32_e32 v9, v9, v8 +; GFX6-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX6-NEXT: v_mul_hi_u32 v6, v7, v6 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX6-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX6-NEXT: v_and_b32_e32 v3, v3, v8 +; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2 +; GFX6-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX6-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v2i24: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX8-NEXT: s_sub_i32 s4, 0, 24 +; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v8, 24 +; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX8-NEXT: v_mul_lo_u32 v7, s4, v6 +; GFX8-NEXT: v_mul_hi_u32 v7, v6, v7 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7 +; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v7, v8 +; GFX8-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX8-NEXT: v_and_b32_e32 v5, v5, v8 +; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX8-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; GFX8-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX8-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX8-NEXT: v_mul_lo_u32 v6, s4, v7 +; GFX8-NEXT: v_sub_u32_e32 v9, vcc, 23, v4 +; GFX8-NEXT: v_and_b32_e32 v9, v9, v8 +; GFX8-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX8-NEXT: v_mul_hi_u32 v6, v7, v6 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, v9, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 +; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 +; GFX8-NEXT: v_and_b32_e32 v3, v3, v8 +; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2 +; GFX8-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX8-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, v4, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, v2, v3 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v2i24: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX9-NEXT: s_sub_i32 s4, 0, 24 +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v8, 24 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX9-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 +; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX9-NEXT: v_mul_lo_u32 v7, s4, v6 +; GFX9-NEXT: v_and_b32_e32 v5, v5, v9 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-NEXT: v_mul_hi_u32 v7, v6, v7 +; GFX9-NEXT: v_and_b32_e32 v3, v3, v9 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX9-NEXT: v_add_u32_e32 v6, v6, v7 +; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v8 +; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 +; GFX9-NEXT: v_mul_lo_u32 v8, s4, v7 +; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_mul_hi_u32 v8, v7, v8 +; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 +; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 +; GFX9-NEXT: v_add_u32_e32 v7, v7, v8 +; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_and_b32_e32 v6, v6, v9 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 +; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 +; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3 +; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v2i24: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 +; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 +; GFX10-NEXT: s_sub_i32 s4, 0, 24 +; GFX10-NEXT: v_mov_b32_e32 v12, 0xffffff +; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX10-NEXT: v_and_b32_e32 v5, v5, v12 +; GFX10-NEXT: v_and_b32_e32 v2, v2, v12 +; GFX10-NEXT: v_and_b32_e32 v3, v3, v12 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX10-NEXT: v_mul_lo_u32 v8, s4, v6 +; GFX10-NEXT: v_mul_lo_u32 v9, s4, v7 +; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9 +; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9 +; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6 +; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 +; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 +; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 +; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 +; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 +; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 +; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 +; GFX10-NEXT: v_cndmask_b32_e32 v11, v4, v6, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 +; GFX10-NEXT: v_and_b32_e32 v4, v11, v12 +; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v11 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, v4, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v7, 23, v5 +; GFX10-NEXT: v_and_b32_e32 v5, v5, v12 +; GFX10-NEXT: v_and_b32_e32 v11, v6, v12 +; GFX10-NEXT: v_and_b32_e32 v4, v7, v12 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, v5, v3 +; GFX10-NEXT: v_lshl_or_b32 v0, v0, v11, v2 +; GFX10-NEXT: v_lshl_or_b32 v1, v1, v4, v3 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) + ret <2 x i24> %result +} + +define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { +; GFX6-LABEL: s_fshr_i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_mov_b32_e32 v0, s1 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: v_mov_b32_e32 v1, s2 +; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) + ret i32 %result +} + +define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) { +; GFX6-LABEL: s_fshr_i32_5: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_mov_b32_e32 v0, s1 +; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 5 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i32_5: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 5 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i32_5: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 5 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i32_5: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 5 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) + ret i32 %result +} + +define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) { +; GFX6-LABEL: s_fshr_i32_8: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_mov_b32_e32 v0, s1 +; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 8 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i32_8: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 8 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i32_8: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 8 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i32_8: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) + ret i32 %result +} + +define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) { +; GFX6-LABEL: v_fshr_i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) + ret i32 %result +} + +define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) { +; GFX6-LABEL: v_fshr_i32_5: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 5 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i32_5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 5 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i32_5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i32_5: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 5 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) + ret i32 %result +} + +define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) { +; GFX6-LABEL: v_fshr_i32_8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 8 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i32_8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 8 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i32_8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 8 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i32_8: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) + ret i32 %result +} + +define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) { +; GFX6-LABEL: v_fshr_i32_ssv: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i32_ssv: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i32_ssv: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i32_ssv: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog + %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) + %cast.result = bitcast i32 %result to float + ret float %cast.result +} + +define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) { +; GFX6-LABEL: v_fshr_i32_svs: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i32_svs: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i32_svs: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i32_svs: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog + %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) + %cast.result = bitcast i32 %result to float + ret float %cast.result +} + +define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { +; GFX6-LABEL: v_fshr_i32_vss: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_mov_b32_e32 v0, s1 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i32_vss: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: v_mov_b32_e32 v1, s2 +; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i32_vss: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i32_vss: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) + %cast.result = bitcast i32 %result to float + ret float %cast.result +} + +define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) { +; GFX6-LABEL: v_fshr_v2i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_alignbit_b32 v0, v0, v2, v4 +; GFX6-NEXT: v_alignbit_b32 v1, v1, v3, v5 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v2i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_alignbit_b32 v0, v0, v2, v4 +; GFX8-NEXT: v_alignbit_b32 v1, v1, v3, v5 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v2i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4 +; GFX9-NEXT: v_alignbit_b32 v1, v1, v3, v5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v2i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4 +; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) + ret <2 x i32> %result +} + +define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) { +; GFX6-LABEL: v_fshr_v3i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_alignbit_b32 v0, v0, v3, v6 +; GFX6-NEXT: v_alignbit_b32 v1, v1, v4, v7 +; GFX6-NEXT: v_alignbit_b32 v2, v2, v5, v8 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v3i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_alignbit_b32 v0, v0, v3, v6 +; GFX8-NEXT: v_alignbit_b32 v1, v1, v4, v7 +; GFX8-NEXT: v_alignbit_b32 v2, v2, v5, v8 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v3i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_alignbit_b32 v0, v0, v3, v6 +; GFX9-NEXT: v_alignbit_b32 v1, v1, v4, v7 +; GFX9-NEXT: v_alignbit_b32 v2, v2, v5, v8 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v3i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6 +; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7 +; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) + ret <3 x i32> %result +} + +define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) { +; GFX6-LABEL: v_fshr_v4i32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_alignbit_b32 v0, v0, v4, v8 +; GFX6-NEXT: v_alignbit_b32 v1, v1, v5, v9 +; GFX6-NEXT: v_alignbit_b32 v2, v2, v6, v10 +; GFX6-NEXT: v_alignbit_b32 v3, v3, v7, v11 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v4i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_alignbit_b32 v0, v0, v4, v8 +; GFX8-NEXT: v_alignbit_b32 v1, v1, v5, v9 +; GFX8-NEXT: v_alignbit_b32 v2, v2, v6, v10 +; GFX8-NEXT: v_alignbit_b32 v3, v3, v7, v11 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v4i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_alignbit_b32 v0, v0, v4, v8 +; GFX9-NEXT: v_alignbit_b32 v1, v1, v5, v9 +; GFX9-NEXT: v_alignbit_b32 v2, v2, v6, v10 +; GFX9-NEXT: v_alignbit_b32 v3, v3, v7, v11 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v4i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8 +; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9 +; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10 +; GFX10-NEXT: v_alignbit_b32 v3, v3, v7, v11 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) + ret <4 x i32> %result +} + +define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) { +; GFX6-LABEL: s_fshr_i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_and_b32 s3, s2, 15 +; GFX6-NEXT: s_andn2_b32 s2, 15, s2 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX6-NEXT: s_lshl_b32 s0, s0, s2 +; GFX6-NEXT: s_bfe_u32 s2, s3, 0x100000 +; GFX6-NEXT: s_and_b32 s1, s1, 0xffff +; GFX6-NEXT: s_lshr_b32 s1, s1, s2 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s3, s2, 15 +; GFX8-NEXT: s_andn2_b32 s2, 15, s2 +; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s4 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, s3, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s1, s2 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s3, s2, 15 +; GFX9-NEXT: s_andn2_b32 s2, 15, s2 +; GFX9-NEXT: s_bfe_u32 s4, 1, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s4 +; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_bfe_u32 s2, s3, 0x100000 +; GFX9-NEXT: s_lshr_b32 s1, s1, s2 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_and_b32 s3, s2, 15 +; GFX10-NEXT: s_andn2_b32 s2, 15, s2 +; GFX10-NEXT: s_bfe_u32 s4, 1, 0x100000 +; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s4 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_lshr_b32 s1, s1, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: ; return to shader part epilog + %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) + ret i16 %result +} + +define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) { +; GFX6-LABEL: s_fshr_i16_4: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_and_b32 s1, s1, 0xffff +; GFX6-NEXT: s_lshl_b32 s0, s0, 11 +; GFX6-NEXT: s_lshr_b32 s1, s1, 4 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i16_4: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s2, 11, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, 4, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s1, s2 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i16_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s2, 11, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_bfe_u32 s2, 4, 0x100000 +; GFX9-NEXT: s_lshr_b32 s1, s1, s2 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i16_4: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_bfe_u32 s2, 11, 0x100000 +; GFX10-NEXT: s_bfe_u32 s3, 4, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_lshr_b32 s1, s1, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: ; return to shader part epilog + %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) + ret i16 %result +} + +define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) { +; GFX6-LABEL: s_fshr_i16_5: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_and_b32 s1, s1, 0xffff +; GFX6-NEXT: s_lshl_b32 s0, s0, 10 +; GFX6-NEXT: s_lshr_b32 s1, s1, 5 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i16_5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s2, 10, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, 5, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s1, s2 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i16_5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s2, 10, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_bfe_u32 s2, 5, 0x100000 +; GFX9-NEXT: s_lshr_b32 s1, s1, s2 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i16_5: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_bfe_u32 s2, 10, 0x100000 +; GFX10-NEXT: s_bfe_u32 s3, 5, 0x100000 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_lshr_b32 s1, s1, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: ; return to shader part epilog + %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) + ret i16 %result +} + +define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) { +; GFX6-LABEL: v_fshr_i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_and_b32_e32 v3, 15, v2 +; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX6-NEXT: v_and_b32_e32 v2, 15, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_bfe_u32 v2, v3, 0, 16 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v3, 15, v2 +; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v3, 15, v2 +; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 +; GFX9-NEXT: v_lshrrev_b16_e32 v1, v3, v1 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 +; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_and_b32_e32 v3, 15, v3 +; GFX10-NEXT: v_lshrrev_b16_e64 v1, v2, v1 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, v3, v0 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) + ret i16 %result +} + +define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) { +; GFX6-LABEL: v_fshr_i16_4: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 11, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 4, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i16_4: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 11, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v1, 4, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i16_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 11, v0 +; GFX9-NEXT: v_lshrrev_b16_e32 v1, 4, v1 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i16_4: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshrrev_b16_e64 v1, 4, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 11, v0 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) + ret i16 %result +} + +define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) { +; GFX6-LABEL: v_fshr_i16_5: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 10, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 5, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i16_5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 10, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v1, 5, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i16_5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 10, v0 +; GFX9-NEXT: v_lshrrev_b16_e32 v1, 5, v1 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i16_5: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshrrev_b16_e64 v1, 5, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 10, v0 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) + ret i16 %result +} + +define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) { +; GFX6-LABEL: v_fshr_i16_ssv: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: s_and_b32 s0, s1, 0xffff +; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i16_ssv: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0 +; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i16_ssv: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0 +; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i16_ssv: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 +; GFX10-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: v_and_b32_e32 v1, 15, v1 +; GFX10-NEXT: v_lshrrev_b16_e64 v0, v0, s1 +; GFX10-NEXT: v_lshlrev_b16_e64 v1, v1, s0 +; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) + %cast.result = bitcast i16 %result to half + ret half %cast.result +} + +define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) { +; GFX6-LABEL: v_fshr_i16_svs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_and_b32 s2, s1, 15 +; GFX6-NEXT: s_andn2_b32 s1, 15, s1 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX6-NEXT: s_lshl_b32 s0, s0, s1 +; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i16_svs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s2, s1, 15 +; GFX8-NEXT: s_andn2_b32 s1, 15, s1 +; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s3 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s1 +; GFX8-NEXT: v_lshrrev_b16_e32 v0, s2, v0 +; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i16_svs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s2, s1, 15 +; GFX9-NEXT: s_andn2_b32 s1, 15, s1 +; GFX9-NEXT: s_bfe_u32 s3, 1, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s3 +; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX9-NEXT: s_lshl_b32 s0, s0, s1 +; GFX9-NEXT: v_lshrrev_b16_e32 v0, s2, v0 +; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i16_svs: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_and_b32 s2, s1, 15 +; GFX10-NEXT: s_andn2_b32 s1, 15, s1 +; GFX10-NEXT: s_bfe_u32 s3, 1, 0x100000 +; GFX10-NEXT: v_lshrrev_b16_e64 v0, s2, v0 +; GFX10-NEXT: s_lshl_b32 s0, s0, s3 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_lshl_b32 s0, s0, s1 +; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) + %cast.result = bitcast i16 %result to half + ret half %cast.result +} + +define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) { +; GFX6-LABEL: v_fshr_i16_vss: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_and_b32 s2, s1, 15 +; GFX6-NEXT: s_andn2_b32 s1, 15, s1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s1, v0 +; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 +; GFX6-NEXT: s_and_b32 s0, s0, 0xffff +; GFX6-NEXT: s_lshr_b32 s0, s0, s1 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i16_vss: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s2, s1, 15 +; GFX8-NEXT: s_andn2_b32 s1, 15, s1 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, s1, v0 +; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX8-NEXT: s_bfe_u32 s1, s2, 0x100000 +; GFX8-NEXT: s_lshr_b32 s0, s0, s1 +; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i16_vss: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s2, s1, 15 +; GFX9-NEXT: s_andn2_b32 s1, 15, s1 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, s1, v0 +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX9-NEXT: s_bfe_u32 s1, s2, 0x100000 +; GFX9-NEXT: s_lshr_b32 s0, s0, s1 +; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i16_vss: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: s_andn2_b32 s2, 15, s1 +; GFX10-NEXT: s_and_b32 s1, s1, 15 +; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: v_lshlrev_b16_e64 v0, s2, v0 +; GFX10-NEXT: s_lshr_b32 s0, s0, s1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) + %cast.result = bitcast i16 %result to half + ret half %cast.result +} + +define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { +; GFX6-LABEL: s_fshr_v2i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s6, 0xffff +; GFX6-NEXT: s_lshl_b32 s5, s5, 16 +; GFX6-NEXT: s_and_b32 s4, s4, s6 +; GFX6-NEXT: s_or_b32 s4, s5, s4 +; GFX6-NEXT: s_mov_b32 s5, 0xf000f +; GFX6-NEXT: s_and_b32 s7, s4, s5 +; GFX6-NEXT: s_xor_b32 s4, s4, -1 +; GFX6-NEXT: s_and_b32 s4, s4, s5 +; GFX6-NEXT: s_lshr_b32 s5, s4, 16 +; GFX6-NEXT: s_lshl_b32 s1, s1, 1 +; GFX6-NEXT: s_lshl_b32 s1, s1, s5 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_and_b32 s4, s4, s6 +; GFX6-NEXT: s_lshl_b32 s0, s0, s4 +; GFX6-NEXT: s_and_b32 s1, s1, s6 +; GFX6-NEXT: s_and_b32 s0, s0, s6 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshr_b32 s1, s7, 16 +; GFX6-NEXT: s_and_b32 s3, s3, s6 +; GFX6-NEXT: s_lshr_b32 s1, s3, s1 +; GFX6-NEXT: s_and_b32 s4, s7, s6 +; GFX6-NEXT: s_and_b32 s2, s2, s6 +; GFX6-NEXT: s_lshr_b32 s2, s2, s4 +; GFX6-NEXT: s_and_b32 s1, s1, s6 +; GFX6-NEXT: s_and_b32 s2, s2, s6 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_mov_b32 s3, 0xf000f +; GFX8-NEXT: s_and_b32 s4, s2, s3 +; GFX8-NEXT: s_xor_b32 s2, s2, -1 +; GFX8-NEXT: s_and_b32 s2, s2, s3 +; GFX8-NEXT: s_lshr_b32 s3, s0, 16 +; GFX8-NEXT: s_bfe_u32 s5, 1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s5 +; GFX8-NEXT: s_lshl_b32 s3, s3, s5 +; GFX8-NEXT: s_lshr_b32 s5, s2, 16 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_bfe_u32 s2, s5, 0x100000 +; GFX8-NEXT: s_lshl_b32 s2, s3, s2 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX8-NEXT: s_lshl_b32 s2, s2, 16 +; GFX8-NEXT: s_lshr_b32 s3, s4, 16 +; GFX8-NEXT: s_or_b32 s0, s0, s2 +; GFX8-NEXT: s_lshr_b32 s2, s1, 16 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX8-NEXT: s_lshr_b32 s2, s2, s3 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s1, s4 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s2, s2, 16 +; GFX8-NEXT: s_or_b32 s1, s1, s2 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s3, 0xf000f +; GFX9-NEXT: s_and_b32 s4, s2, s3 +; GFX9-NEXT: s_andn2_b32 s2, s3, s2 +; GFX9-NEXT: s_lshr_b32 s3, s0, 16 +; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 +; GFX9-NEXT: s_lshl_b32 s3, s3, 1 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 +; GFX9-NEXT: s_lshr_b32 s3, s0, 16 +; GFX9-NEXT: s_lshr_b32 s5, s2, 16 +; GFX9-NEXT: s_lshl_b32 s0, s0, s2 +; GFX9-NEXT: s_lshl_b32 s2, s3, s5 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX9-NEXT: s_lshr_b32 s2, s1, 16 +; GFX9-NEXT: s_lshr_b32 s3, s4, 16 +; GFX9-NEXT: s_lshr_b32 s1, s1, s4 +; GFX9-NEXT: s_lshr_b32 s2, s2, s3 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_v2i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_lshr_b32 s4, s0, 16 +; GFX10-NEXT: s_mov_b32 s3, 0xf000f +; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 +; GFX10-NEXT: s_lshl_b32 s4, s4, 1 +; GFX10-NEXT: s_and_b32 s5, s2, s3 +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX10-NEXT: s_andn2_b32 s2, s3, s2 +; GFX10-NEXT: s_lshr_b32 s3, s0, 16 +; GFX10-NEXT: s_lshr_b32 s4, s2, 16 +; GFX10-NEXT: s_lshl_b32 s0, s0, s2 +; GFX10-NEXT: s_lshl_b32 s2, s3, s4 +; GFX10-NEXT: s_lshr_b32 s3, s1, 16 +; GFX10-NEXT: s_lshr_b32 s4, s5, 16 +; GFX10-NEXT: s_lshr_b32 s1, s1, s5 +; GFX10-NEXT: s_lshr_b32 s3, s3, s4 +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b32 s0, s0, s1 +; GFX10-NEXT: ; return to shader part epilog + %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) + %cast = bitcast <2 x i16> %result to i32 + ret i32 %cast +} + +define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) { +; GFX6-LABEL: v_fshr_v2i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX6-NEXT: v_or_b32_e32 v4, v5, v4 +; GFX6-NEXT: s_mov_b32 s4, 0xf000f +; GFX6-NEXT: v_and_b32_e32 v5, s4, v4 +; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 +; GFX6-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX6-NEXT: s_mov_b32 s5, 0xffff +; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, v6, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_and_b32_e32 v4, s5, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0 +; GFX6-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX6-NEXT: v_and_b32_e32 v0, s5, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v5 +; GFX6-NEXT: v_and_b32_e32 v3, s5, v3 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v1, v3 +; GFX6-NEXT: v_and_b32_e32 v4, s5, v5 +; GFX6-NEXT: v_and_b32_e32 v2, s5, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 +; GFX6-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX6-NEXT: v_and_b32_e32 v2, s5, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s4, 0xf000f +; GFX8-NEXT: v_and_b32_e32 v3, s4, v2 +; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX8-NEXT: v_mov_b32_e32 v5, 1 +; GFX8-NEXT: v_lshlrev_b16_e32 v4, 1, v0 +; GFX8-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshlrev_b16_e32 v4, v2, v4 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b16_e32 v2, v3, v1 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s4, 0xf000f +; GFX9-NEXT: v_and_b32_e32 v3, s4, v2 +; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] +; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0 +; GFX9-NEXT: v_pk_lshrrev_b16 v1, v3, v1 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v2i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 +; GFX10-NEXT: s_mov_b32 s4, 0xf000f +; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] +; GFX10-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX10-NEXT: v_pk_lshrrev_b16 v1, v2, v1 +; GFX10-NEXT: v_pk_lshlrev_b16 v0, v3, v0 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) + ret <2 x i16> %result +} + +define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) { +; GFX6-LABEL: v_fshr_v2i16_4_8: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: s_mov_b32 s4, 0xffff +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 7, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 11, v0 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v2 +; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 4, v1 +; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v2i16_4_8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v3, 1 +; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v0 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_mov_b32_e32 v3, 7 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b16_e32 v2, 11, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, 8 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v2, 4, v1 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v2i16_4_8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s4, 0x80004 +; GFX9-NEXT: s_mov_b32 s5, 0x7000b +; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] +; GFX9-NEXT: v_pk_lshlrev_b16 v0, s5, v0 +; GFX9-NEXT: v_pk_lshrrev_b16 v1, s4, v1 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v2i16_4_8: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x7000b, v0 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> ) + ret <2 x i16> %result +} + +define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) { +; GFX6-LABEL: v_fshr_v2i16_ssv: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: s_mov_b32 s4, 0xf000f +; GFX6-NEXT: v_and_b32_e32 v1, s4, v0 +; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: s_mov_b32 s5, 0xffff +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX6-NEXT: s_lshl_b32 s1, s1, 1 +; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: v_and_b32_e32 v0, s5, v0 +; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_and_b32_e32 v2, s5, v2 +; GFX6-NEXT: v_and_b32_e32 v0, s5, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX6-NEXT: s_and_b32 s0, s2, s5 +; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 +; GFX6-NEXT: s_and_b32 s0, s3, s5 +; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 +; GFX6-NEXT: v_and_b32_e32 v2, s5, v2 +; GFX6-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_v2i16_ssv: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_mov_b32 s2, 0xf000f +; GFX8-NEXT: v_and_b32_e32 v1, s2, v0 +; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX8-NEXT: v_and_b32_e32 v0, s2, v0 +; GFX8-NEXT: s_lshr_b32 s2, s0, 16 +; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s3 +; GFX8-NEXT: s_lshl_b32 s2, s2, s3 +; GFX8-NEXT: v_lshlrev_b16_e64 v2, v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: s_lshr_b32 s0, s1, 16 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v3, s0 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_lshrrev_b16_e64 v2, v1, s1 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_v2i16_ssv: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s2, 0xf000f +; GFX9-NEXT: v_and_b32_e32 v1, s2, v0 +; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX9-NEXT: v_and_b32_e32 v0, s2, v0 +; GFX9-NEXT: s_lshr_b32 s2, s0, 16 +; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 +; GFX9-NEXT: s_lshl_b32 s2, s2, 1 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, s0 +; GFX9-NEXT: v_pk_lshrrev_b16 v1, v1, s1 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_v2i16_ssv: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 +; GFX10-NEXT: s_mov_b32 s2, 0xf000f +; GFX10-NEXT: s_lshr_b32 s3, s0, 16 +; GFX10-NEXT: v_and_b32_e32 v0, s2, v0 +; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 +; GFX10-NEXT: v_and_b32_e32 v1, s2, v1 +; GFX10-NEXT: s_lshl_b32 s2, s3, 1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX10-NEXT: v_pk_lshrrev_b16 v0, v0, s1 +; GFX10-NEXT: v_pk_lshlrev_b16 v1, v1, s0 +; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) + %cast = bitcast <2 x i16> %result to float + ret float %cast +} + +define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) { +; GFX6-LABEL: v_fshr_v2i16_svs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s4, 0xffff +; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_and_b32 s2, s2, s4 +; GFX6-NEXT: s_or_b32 s2, s3, s2 +; GFX6-NEXT: s_mov_b32 s3, 0xf000f +; GFX6-NEXT: s_and_b32 s5, s2, s3 +; GFX6-NEXT: s_xor_b32 s2, s2, -1 +; GFX6-NEXT: s_and_b32 s2, s2, s3 +; GFX6-NEXT: s_lshr_b32 s3, s2, 16 +; GFX6-NEXT: s_lshl_b32 s1, s1, 1 +; GFX6-NEXT: s_lshl_b32 s1, s1, s3 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_and_b32 s2, s2, s4 +; GFX6-NEXT: s_lshl_b32 s0, s0, s2 +; GFX6-NEXT: s_and_b32 s1, s1, s4 +; GFX6-NEXT: s_and_b32 s0, s0, s4 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshr_b32 s1, s5, 16 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, s1, v1 +; GFX6-NEXT: s_and_b32 s2, s5, s4 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s2, v0 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_v2i16_svs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_mov_b32 s2, 0xf000f +; GFX8-NEXT: s_and_b32 s3, s1, s2 +; GFX8-NEXT: s_xor_b32 s1, s1, -1 +; GFX8-NEXT: s_and_b32 s1, s1, s2 +; GFX8-NEXT: s_lshr_b32 s2, s0, 16 +; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s4 +; GFX8-NEXT: s_lshl_b32 s2, s2, s4 +; GFX8-NEXT: s_lshr_b32 s4, s1, 16 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s1 +; GFX8-NEXT: s_bfe_u32 s1, s4, 0x100000 +; GFX8-NEXT: s_lshl_b32 s1, s2, s1 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: v_mov_b32_e32 v2, s1 +; GFX8-NEXT: v_lshrrev_b16_e32 v1, s3, v0 +; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_v2i16_svs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s2, 0xf000f +; GFX9-NEXT: s_and_b32 s3, s1, s2 +; GFX9-NEXT: s_andn2_b32 s1, s2, s1 +; GFX9-NEXT: s_lshr_b32 s2, s0, 16 +; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 +; GFX9-NEXT: s_lshl_b32 s2, s2, 1 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX9-NEXT: s_lshr_b32 s2, s0, 16 +; GFX9-NEXT: s_lshr_b32 s4, s1, 16 +; GFX9-NEXT: s_lshl_b32 s0, s0, s1 +; GFX9-NEXT: s_lshl_b32 s1, s2, s4 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX9-NEXT: v_pk_lshrrev_b16 v0, s3, v0 +; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_v2i16_svs: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_lshr_b32 s3, s0, 16 +; GFX10-NEXT: s_mov_b32 s2, 0xf000f +; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 +; GFX10-NEXT: s_lshl_b32 s3, s3, 1 +; GFX10-NEXT: s_and_b32 s4, s1, s2 +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3 +; GFX10-NEXT: s_andn2_b32 s1, s2, s1 +; GFX10-NEXT: s_lshr_b32 s2, s0, 16 +; GFX10-NEXT: s_lshr_b32 s3, s1, 16 +; GFX10-NEXT: v_pk_lshrrev_b16 v0, s4, v0 +; GFX10-NEXT: s_lshl_b32 s0, s0, s1 +; GFX10-NEXT: s_lshl_b32 s1, s2, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) + %cast = bitcast <2 x i16> %result to float + ret float %cast +} + +define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { +; GFX6-LABEL: v_fshr_v2i16_vss: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s4, 0xffff +; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_and_b32 s2, s2, s4 +; GFX6-NEXT: s_or_b32 s2, s3, s2 +; GFX6-NEXT: s_mov_b32 s3, 0xf000f +; GFX6-NEXT: s_and_b32 s5, s2, s3 +; GFX6-NEXT: s_xor_b32 s2, s2, -1 +; GFX6-NEXT: s_and_b32 s2, s2, s3 +; GFX6-NEXT: s_lshr_b32 s3, s2, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: s_and_b32 s2, s2, s4 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s2, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: s_lshr_b32 s2, s5, 16 +; GFX6-NEXT: s_and_b32 s1, s1, s4 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, s3, v1 +; GFX6-NEXT: s_lshr_b32 s1, s1, s2 +; GFX6-NEXT: s_and_b32 s3, s5, s4 +; GFX6-NEXT: s_and_b32 s0, s0, s4 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: s_lshr_b32 s0, s0, s3 +; GFX6-NEXT: s_and_b32 s1, s1, s4 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: s_and_b32 s0, s0, s4 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_v2i16_vss: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_mov_b32 s2, 0xf000f +; GFX8-NEXT: s_and_b32 s3, s1, s2 +; GFX8-NEXT: s_xor_b32 s1, s1, -1 +; GFX8-NEXT: s_and_b32 s1, s1, s2 +; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, 1 +; GFX8-NEXT: s_lshr_b32 s2, s1, 16 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: s_lshr_b32 s2, s3, 16 +; GFX8-NEXT: v_lshlrev_b16_e32 v1, s1, v1 +; GFX8-NEXT: s_lshr_b32 s1, s0, 16 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_lshr_b32 s1, s1, s2 +; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX8-NEXT: s_lshr_b32 s0, s0, s3 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_v2i16_vss: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s2, 0xf000f +; GFX9-NEXT: s_and_b32 s3, s1, s2 +; GFX9-NEXT: s_andn2_b32 s1, s2, s1 +; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] +; GFX9-NEXT: v_pk_lshlrev_b16 v0, s1, v0 +; GFX9-NEXT: s_lshr_b32 s1, s0, 16 +; GFX9-NEXT: s_lshr_b32 s2, s3, 16 +; GFX9-NEXT: s_lshr_b32 s0, s0, s3 +; GFX9-NEXT: s_lshr_b32 s1, s1, s2 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_v2i16_vss: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 s2, 0xf000f +; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] +; GFX10-NEXT: s_and_b32 s3, s1, s2 +; GFX10-NEXT: s_andn2_b32 s1, s2, s1 +; GFX10-NEXT: s_lshr_b32 s2, s0, 16 +; GFX10-NEXT: s_lshr_b32 s4, s3, 16 +; GFX10-NEXT: v_pk_lshlrev_b16 v0, s1, v0 +; GFX10-NEXT: s_lshr_b32 s0, s0, s3 +; GFX10-NEXT: s_lshr_b32 s1, s2, s4 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX10-NEXT: ; return to shader part epilog + %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) + %cast = bitcast <2 x i16> %result to float + ret float %cast +} + +define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) { + %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) + %cast = bitcast <3 x i16> %result to i48 + ret i48 %cast +} + +define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) { + %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) + %cast.result = bitcast <3 x i16> %result to <3 x half> + ret <3 x half> %cast.result +} + +define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) { +; GFX6-LABEL: s_fshr_v4i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s12, 0xffff +; GFX6-NEXT: s_lshl_b32 s9, s9, 16 +; GFX6-NEXT: s_and_b32 s8, s8, s12 +; GFX6-NEXT: s_or_b32 s8, s9, s8 +; GFX6-NEXT: s_lshl_b32 s9, s11, 16 +; GFX6-NEXT: s_and_b32 s10, s10, s12 +; GFX6-NEXT: s_or_b32 s9, s9, s10 +; GFX6-NEXT: s_mov_b32 s10, 0xf000f +; GFX6-NEXT: s_and_b32 s11, s8, s10 +; GFX6-NEXT: s_xor_b32 s8, s8, -1 +; GFX6-NEXT: s_and_b32 s8, s8, s10 +; GFX6-NEXT: s_lshr_b32 s13, s8, 16 +; GFX6-NEXT: s_lshl_b32 s1, s1, 1 +; GFX6-NEXT: s_lshl_b32 s1, s1, s13 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_and_b32 s8, s8, s12 +; GFX6-NEXT: s_lshl_b32 s0, s0, s8 +; GFX6-NEXT: s_and_b32 s1, s1, s12 +; GFX6-NEXT: s_and_b32 s0, s0, s12 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshr_b32 s1, s11, 16 +; GFX6-NEXT: s_and_b32 s5, s5, s12 +; GFX6-NEXT: s_lshr_b32 s1, s5, s1 +; GFX6-NEXT: s_and_b32 s8, s11, s12 +; GFX6-NEXT: s_and_b32 s4, s4, s12 +; GFX6-NEXT: s_lshr_b32 s4, s4, s8 +; GFX6-NEXT: s_and_b32 s1, s1, s12 +; GFX6-NEXT: s_and_b32 s4, s4, s12 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_or_b32 s1, s4, s1 +; GFX6-NEXT: s_xor_b32 s4, s9, -1 +; GFX6-NEXT: s_and_b32 s4, s4, s10 +; GFX6-NEXT: s_lshr_b32 s5, s4, 16 +; GFX6-NEXT: s_lshl_b32 s3, s3, 1 +; GFX6-NEXT: s_lshl_b32 s3, s3, s5 +; GFX6-NEXT: s_lshl_b32 s2, s2, 1 +; GFX6-NEXT: s_and_b32 s4, s4, s12 +; GFX6-NEXT: s_lshl_b32 s2, s2, s4 +; GFX6-NEXT: s_and_b32 s3, s3, s12 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_and_b32 s1, s9, s10 +; GFX6-NEXT: s_and_b32 s2, s2, s12 +; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s3 +; GFX6-NEXT: s_lshr_b32 s3, s1, 16 +; GFX6-NEXT: s_and_b32 s1, s1, s12 +; GFX6-NEXT: s_and_b32 s4, s6, s12 +; GFX6-NEXT: s_lshr_b32 s1, s4, s1 +; GFX6-NEXT: s_and_b32 s4, s7, s12 +; GFX6-NEXT: s_lshr_b32 s3, s4, s3 +; GFX6-NEXT: s_and_b32 s3, s3, s12 +; GFX6-NEXT: s_and_b32 s1, s1, s12 +; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_or_b32 s1, s1, s3 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_mov_b32 s6, 0xf000f +; GFX8-NEXT: s_and_b32 s7, s4, s6 +; GFX8-NEXT: s_xor_b32 s4, s4, -1 +; GFX8-NEXT: s_and_b32 s4, s4, s6 +; GFX8-NEXT: s_bfe_u32 s9, 1, 0x100000 +; GFX8-NEXT: s_lshr_b32 s8, s0, 16 +; GFX8-NEXT: s_lshr_b32 s10, s4, 16 +; GFX8-NEXT: s_lshl_b32 s0, s0, s9 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_lshl_b32 s0, s0, s4 +; GFX8-NEXT: s_lshl_b32 s8, s8, s9 +; GFX8-NEXT: s_bfe_u32 s4, s10, 0x100000 +; GFX8-NEXT: s_lshl_b32 s4, s8, s4 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 +; GFX8-NEXT: s_or_b32 s0, s0, s4 +; GFX8-NEXT: s_lshr_b32 s4, s2, 16 +; GFX8-NEXT: s_lshr_b32 s8, s7, 16 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000 +; GFX8-NEXT: s_lshr_b32 s2, s2, s7 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_bfe_u32 s7, s8, 0x100000 +; GFX8-NEXT: s_lshr_b32 s4, s4, s7 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 +; GFX8-NEXT: s_or_b32 s2, s2, s4 +; GFX8-NEXT: s_xor_b32 s4, s5, -1 +; GFX8-NEXT: s_and_b32 s4, s4, s6 +; GFX8-NEXT: s_or_b32 s0, s0, s2 +; GFX8-NEXT: s_and_b32 s2, s5, s6 +; GFX8-NEXT: s_lshr_b32 s5, s1, 16 +; GFX8-NEXT: s_lshr_b32 s6, s4, 16 +; GFX8-NEXT: s_lshl_b32 s1, s1, s9 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_lshl_b32 s1, s1, s4 +; GFX8-NEXT: s_lshl_b32 s5, s5, s9 +; GFX8-NEXT: s_bfe_u32 s4, s6, 0x100000 +; GFX8-NEXT: s_lshl_b32 s4, s5, s4 +; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 +; GFX8-NEXT: s_or_b32 s1, s1, s4 +; GFX8-NEXT: s_lshr_b32 s4, s3, 16 +; GFX8-NEXT: s_lshr_b32 s5, s2, 16 +; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_lshr_b32 s2, s3, s2 +; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000 +; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000 +; GFX8-NEXT: s_lshr_b32 s3, s3, s4 +; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX8-NEXT: s_lshl_b32 s3, s3, 16 +; GFX8-NEXT: s_or_b32 s2, s2, s3 +; GFX8-NEXT: s_or_b32 s1, s1, s2 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_lshr_b32 s9, s0, 16 +; GFX9-NEXT: s_mov_b32 s8, 0x10001 +; GFX9-NEXT: s_mov_b32 s6, 0xf000f +; GFX9-NEXT: s_lshl_b32 s0, s0, s8 +; GFX9-NEXT: s_lshl_b32 s9, s9, 1 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s9 +; GFX9-NEXT: s_and_b32 s7, s4, s6 +; GFX9-NEXT: s_andn2_b32 s4, s6, s4 +; GFX9-NEXT: s_lshr_b32 s9, s0, 16 +; GFX9-NEXT: s_lshr_b32 s10, s4, 16 +; GFX9-NEXT: s_lshl_b32 s0, s0, s4 +; GFX9-NEXT: s_lshl_b32 s4, s9, s10 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX9-NEXT: s_lshr_b32 s4, s2, 16 +; GFX9-NEXT: s_lshr_b32 s9, s7, 16 +; GFX9-NEXT: s_lshr_b32 s2, s2, s7 +; GFX9-NEXT: s_lshr_b32 s4, s4, s9 +; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 +; GFX9-NEXT: s_or_b32 s0, s0, s2 +; GFX9-NEXT: s_and_b32 s2, s5, s6 +; GFX9-NEXT: s_andn2_b32 s4, s6, s5 +; GFX9-NEXT: s_lshr_b32 s5, s1, 16 +; GFX9-NEXT: s_lshl_b32 s1, s1, s8 +; GFX9-NEXT: s_lshl_b32 s5, s5, 1 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 +; GFX9-NEXT: s_lshr_b32 s5, s1, 16 +; GFX9-NEXT: s_lshr_b32 s6, s4, 16 +; GFX9-NEXT: s_lshl_b32 s1, s1, s4 +; GFX9-NEXT: s_lshl_b32 s4, s5, s6 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 +; GFX9-NEXT: s_lshr_b32 s5, s2, 16 +; GFX9-NEXT: s_lshr_b32 s4, s3, 16 +; GFX9-NEXT: s_lshr_b32 s2, s3, s2 +; GFX9-NEXT: s_lshr_b32 s3, s4, s5 +; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3 +; GFX9-NEXT: s_or_b32 s1, s1, s2 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_v4i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_lshr_b32 s8, s0, 16 +; GFX10-NEXT: s_mov_b32 s7, 0x10001 +; GFX10-NEXT: s_mov_b32 s6, 0xf000f +; GFX10-NEXT: s_lshl_b32 s0, s0, s7 +; GFX10-NEXT: s_lshl_b32 s8, s8, 1 +; GFX10-NEXT: s_and_b32 s9, s4, s6 +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8 +; GFX10-NEXT: s_andn2_b32 s4, s6, s4 +; GFX10-NEXT: s_lshr_b32 s8, s0, 16 +; GFX10-NEXT: s_lshr_b32 s10, s4, 16 +; GFX10-NEXT: s_lshl_b32 s0, s0, s4 +; GFX10-NEXT: s_lshl_b32 s4, s8, s10 +; GFX10-NEXT: s_lshr_b32 s8, s2, 16 +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX10-NEXT: s_lshr_b32 s4, s1, 16 +; GFX10-NEXT: s_lshl_b32 s1, s1, s7 +; GFX10-NEXT: s_lshl_b32 s4, s4, 1 +; GFX10-NEXT: s_and_b32 s7, s5, s6 +; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 +; GFX10-NEXT: s_andn2_b32 s4, s6, s5 +; GFX10-NEXT: s_lshr_b32 s5, s1, 16 +; GFX10-NEXT: s_lshr_b32 s6, s4, 16 +; GFX10-NEXT: s_lshr_b32 s10, s9, 16 +; GFX10-NEXT: s_lshl_b32 s1, s1, s4 +; GFX10-NEXT: s_lshl_b32 s4, s5, s6 +; GFX10-NEXT: s_lshr_b32 s5, s3, 16 +; GFX10-NEXT: s_lshr_b32 s6, s7, 16 +; GFX10-NEXT: s_lshr_b32 s2, s2, s9 +; GFX10-NEXT: s_lshr_b32 s8, s8, s10 +; GFX10-NEXT: s_lshr_b32 s3, s3, s7 +; GFX10-NEXT: s_lshr_b32 s5, s5, s6 +; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s8 +; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 +; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5 +; GFX10-NEXT: s_or_b32 s0, s0, s2 +; GFX10-NEXT: s_or_b32 s1, s1, s3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog + %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) + %cast.result = bitcast <4 x i16> %result to <2 x i32> + ret <2 x i32> %cast.result +} + +define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) { +; GFX6-LABEL: v_fshr_v4i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mov_b32_e32 v12, 0xffff +; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX6-NEXT: v_and_b32_e32 v8, v8, v12 +; GFX6-NEXT: v_or_b32_e32 v8, v9, v8 +; GFX6-NEXT: s_mov_b32 s4, 0xf000f +; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v11 +; GFX6-NEXT: v_and_b32_e32 v11, s4, v8 +; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 +; GFX6-NEXT: v_and_b32_e32 v8, s4, v8 +; GFX6-NEXT: v_and_b32_e32 v10, v10, v12 +; GFX6-NEXT: s_mov_b32 s5, 0xffff +; GFX6-NEXT: v_lshrrev_b32_e32 v12, 16, v8 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, v12, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_and_b32_e32 v8, s5, v8 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v8, v0 +; GFX6-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX6-NEXT: v_and_b32_e32 v0, s5, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v11 +; GFX6-NEXT: v_and_b32_e32 v5, s5, v5 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v1, v5 +; GFX6-NEXT: v_and_b32_e32 v8, s5, v11 +; GFX6-NEXT: v_and_b32_e32 v4, s5, v4 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4 +; GFX6-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX6-NEXT: v_or_b32_e32 v9, v9, v10 +; GFX6-NEXT: v_and_b32_e32 v4, s5, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX6-NEXT: v_mov_b32_e32 v10, 0xf000f +; GFX6-NEXT: v_xor_b32_e32 v4, -1, v9 +; GFX6-NEXT: v_and_b32_e32 v4, v4, v10 +; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, v5, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 +; GFX6-NEXT: v_and_b32_e32 v4, s5, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX6-NEXT: v_and_b32_e32 v3, s5, v3 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_and_b32_e32 v1, v9, v10 +; GFX6-NEXT: v_and_b32_e32 v2, s5, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX6-NEXT: v_and_b32_e32 v4, s5, v6 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v1, v4 +; GFX6-NEXT: v_and_b32_e32 v4, s5, v7 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, v3, v4 +; GFX6-NEXT: v_and_b32_e32 v3, s5, v3 +; GFX6-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s4, 0xf000f +; GFX8-NEXT: v_and_b32_e32 v7, s4, v4 +; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 +; GFX8-NEXT: v_mov_b32_e32 v9, 1 +; GFX8-NEXT: v_lshlrev_b16_e32 v8, 1, v0 +; GFX8-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshlrev_b16_e32 v8, v4, v8 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v2 +; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v7, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX8-NEXT: v_mov_b32_e32 v6, 0xf000f +; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_and_b32_e32 v2, v5, v6 +; GFX8-NEXT: v_lshlrev_b16_e32 v5, 1, v1 +; GFX8-NEXT: v_and_b32_e32 v4, v4, v6 +; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshlrev_b16_e32 v5, v4, v5 +; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b16_e32 v4, v2, v3 +; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v1, v5, v1 +; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s4, 0xf000f +; GFX9-NEXT: v_and_b32_e32 v6, s4, v4 +; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 +; GFX9-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] +; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0 +; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5 +; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX9-NEXT: v_and_b32_e32 v2, s4, v5 +; GFX9-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] +; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1 +; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3 +; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v4i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 +; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 +; GFX10-NEXT: s_mov_b32 s4, 0xf000f +; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] +; GFX10-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX10-NEXT: v_and_b32_e32 v6, s4, v6 +; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 +; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 +; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 +; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 +; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) + %cast.result = bitcast <4 x i16> %result to <4 x half> + ret <4 x half> %cast.result +} + +define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) { +; GFX6-LABEL: s_fshr_i64: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 63 +; GFX6-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 +; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 63 +; GFX8-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 +; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 +; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 63 +; GFX9-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 +; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 +; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_andn2_b64 s[6:7], 63, s[4:5] +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], 63 +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s6 +; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: ; return to shader part epilog + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) + ret i64 %result +} + +define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) { +; GFX6-LABEL: s_fshr_i64_5: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_lshl_b32 s1, s0, 26 +; GFX6-NEXT: s_mov_b32 s0, 0 +; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], 5 +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i64_5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-NEXT: s_lshl_b32 s1, s0, 26 +; GFX8-NEXT: s_mov_b32 s0, 0 +; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], 5 +; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i64_5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_lshl_b32 s0, s0, 1 +; GFX9-NEXT: s_lshl_b32 s1, s0, 26 +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], 5 +; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i64_5: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_lshl_b32 s0, s0, 1 +; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 5 +; GFX10-NEXT: s_lshl_b32 s1, s0, 26 +; GFX10-NEXT: s_mov_b32 s0, 0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: ; return to shader part epilog + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) + ret i64 %result +} + +define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) { +; GFX6-LABEL: s_fshr_i64_32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX6-NEXT: s_mov_b32 s2, s3 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 31 +; GFX6-NEXT: s_mov_b32 s3, 0 +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i64_32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX8-NEXT: s_mov_b32 s2, s3 +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 31 +; GFX8-NEXT: s_mov_b32 s3, 0 +; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i64_32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX9-NEXT: s_mov_b32 s2, s3 +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 31 +; GFX9-NEXT: s_mov_b32 s3, 0 +; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i64_32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX10-NEXT: s_mov_b32 s2, s3 +; GFX10-NEXT: s_mov_b32 s3, 0 +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 31 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: ; return to shader part epilog + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) + ret i64 %result +} + +define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) { +; GFX6-LABEL: s_fshr_i64_48: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX6-NEXT: s_lshr_b32 s2, s3, 16 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 15 +; GFX6-NEXT: s_mov_b32 s3, 0 +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i64_48: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX8-NEXT: s_lshr_b32 s2, s3, 16 +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 15 +; GFX8-NEXT: s_mov_b32 s3, 0 +; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i64_48: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX9-NEXT: s_lshr_b32 s2, s3, 16 +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 15 +; GFX9-NEXT: s_mov_b32 s3, 0 +; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i64_48: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX10-NEXT: s_lshr_b32 s2, s3, 16 +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 15 +; GFX10-NEXT: s_mov_b32 s3, 0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: ; return to shader part epilog + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48) + ret i64 %result +} + +define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) { +; GFX6-LABEL: v_fshr_i64: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_and_b32_e32 v5, 63, v4 +; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 +; GFX6-NEXT: v_and_b32_e32 v4, 63, v4 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 +; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v5 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v5, 63, v4 +; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 +; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX8-NEXT: v_and_b32_e32 v4, 63, v4 +; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] +; GFX8-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v5, 63, v4 +; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 +; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 +; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] +; GFX9-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] +; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_xor_b32_e32 v5, -1, v4 +; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX10-NEXT: v_and_b32_e32 v4, 63, v4 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_and_b32_e32 v7, 63, v5 +; GFX10-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] +; GFX10-NEXT: v_lshlrev_b64 v[0:1], v7, v[0:1] +; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) + ret i64 %result +} + +define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) { +; GFX6-LABEL: v_fshr_i64_5: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v0 +; GFX6-NEXT: v_lshr_b64 v[0:1], v[2:3], 5 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 26, v4 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i64_5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 1, v0 +; GFX8-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 26, v4 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i64_5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 1, v0 +; GFX9-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 26, v4 +; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i64_5: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 1, v0 +; GFX10-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 26, v4 +; GFX10-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) + ret i64 %result +} + +define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) { +; GFX6-LABEL: v_fshr_i64_32: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i64_32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] +; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i64_32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] +; GFX9-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i64_32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] +; GFX10-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) + ret i64 %result +} + +define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) { +; GFX6-LABEL: v_fshr_i64_48: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 15 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i64_48: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[0:1], 15, v[0:1] +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i64_48: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[0:1], 15, v[0:1] +; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i64_48: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_lshlrev_b64 v[0:1], 15, v[0:1] +; GFX10-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48) + ret i64 %result +} + +define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) { +; GFX6-LABEL: v_fshr_i64_ssv: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_and_b32_e32 v2, 63, v0 +; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX6-NEXT: v_and_b32_e32 v0, 63, v0 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v0 +; GFX6-NEXT: v_lshr_b64 v[2:3], s[2:3], v2 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i64_ssv: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_and_b32_e32 v2, 63, v0 +; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 63, v0 +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] +; GFX8-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3] +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i64_ssv: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_and_b32_e32 v2, 63, v0 +; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 63, v0 +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] +; GFX9-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3] +; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i64_ssv: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 +; GFX10-NEXT: v_and_b32_e32 v0, 63, v0 +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_and_b32_e32 v2, 63, v1 +; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3] +; GFX10-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1] +; GFX10-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX10-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX10-NEXT: ; return to shader part epilog + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) + %cast = bitcast i64 %result to <2 x float> + ret <2 x float> %cast +} + +define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) { +; GFX6-LABEL: v_fshr_i64_svs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63 +; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s4 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i64_svs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63 +; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX8-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 +; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i64_svs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63 +; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX9-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 +; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i64_svs: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_and_b64 s[4:5], s[2:3], 63 +; GFX10-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] +; GFX10-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 +; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX10-NEXT: ; return to shader part epilog + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) + %cast = bitcast i64 %result to <2 x float> + ret <2 x float> %cast +} + +define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) { +; GFX6-LABEL: v_fshr_i64_vss: +; GFX6: ; %bb.0: +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 +; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63 +; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s2 +; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i64_vss: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63 +; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] +; GFX8-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1] +; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 +; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i64_vss: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63 +; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] +; GFX9-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1] +; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 +; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i64_vss: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX10-NEXT: s_andn2_b64 s[4:5], 63, s[2:3] +; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], 63 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX10-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1] +; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX10-NEXT: ; return to shader part epilog + %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) + %cast = bitcast i64 %result to <2 x float> + ret <2 x float> %cast +} + +define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) { +; GFX6-LABEL: s_fshr_v2i64: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], 63 +; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 +; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] +; GFX6-NEXT: s_and_b64 s[4:5], s[10:11], 63 +; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] +; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 +; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 +; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 +; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_v2i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], 63 +; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 +; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 +; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] +; GFX8-NEXT: s_and_b64 s[4:5], s[10:11], 63 +; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] +; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 +; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 +; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 +; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_v2i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], 63 +; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 +; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 +; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] +; GFX9-NEXT: s_and_b64 s[4:5], s[10:11], 63 +; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] +; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 +; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 +; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 +; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_v2i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_andn2_b64 s[12:13], 63, s[8:9] +; GFX10-NEXT: s_and_b64 s[8:9], s[8:9], 63 +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 +; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 +; GFX10-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] +; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 +; GFX10-NEXT: s_and_b64 s[10:11], s[10:11], 63 +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 +; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 +; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s10 +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: ; return to shader part epilog + %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) + ret <2 x i64> %result +} + +define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) { +; GFX6-LABEL: v_fshr_v2i64: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_and_b32_e32 v9, 63, v8 +; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 +; GFX6-NEXT: v_and_b32_e32 v8, 63, v8 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v8 +; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v9 +; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10 +; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX6-NEXT: v_and_b32_e32 v4, 63, v10 +; GFX6-NEXT: v_and_b32_e32 v8, 63, v8 +; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v8 +; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v4 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v6 +; GFX6-NEXT: v_or_b32_e32 v3, v3, v7 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v2i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v9, 63, v8 +; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8 +; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX8-NEXT: v_and_b32_e32 v8, 63, v8 +; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] +; GFX8-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] +; GFX8-NEXT: v_xor_b32_e32 v8, -1, v10 +; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_e32 v4, 63, v10 +; GFX8-NEXT: v_and_b32_e32 v8, 63, v8 +; GFX8-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3] +; GFX8-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] +; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v7 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v2i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v9, 63, v8 +; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8 +; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 +; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] +; GFX9-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] +; GFX9-NEXT: v_xor_b32_e32 v8, -1, v10 +; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] +; GFX9-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX9-NEXT: v_and_b32_e32 v4, 63, v10 +; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 +; GFX9-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3] +; GFX9-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] +; GFX9-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX9-NEXT: v_or_b32_e32 v2, v2, v6 +; GFX9-NEXT: v_or_b32_e32 v3, v3, v7 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v2i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8 +; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10 +; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] +; GFX10-NEXT: v_and_b32_e32 v19, 63, v8 +; GFX10-NEXT: v_and_b32_e32 v15, 63, v9 +; GFX10-NEXT: v_and_b32_e32 v9, 63, v11 +; GFX10-NEXT: v_and_b32_e32 v13, 63, v10 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_lshrrev_b64 v[4:5], v19, v[4:5] +; GFX10-NEXT: v_lshlrev_b64 v[11:12], v15, v[0:1] +; GFX10-NEXT: v_lshlrev_b64 v[15:16], v9, v[2:3] +; GFX10-NEXT: v_lshrrev_b64 v[6:7], v13, v[6:7] +; GFX10-NEXT: v_or_b32_e32 v0, v11, v4 +; GFX10-NEXT: v_or_b32_e32 v1, v12, v5 +; GFX10-NEXT: v_or_b32_e32 v2, v15, v6 +; GFX10-NEXT: v_or_b32_e32 v3, v16, v7 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) + ret <2 x i64> %result +} + +define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) { +; GFX6-LABEL: s_fshr_i128: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_movk_i32 s10, 0x7f +; GFX6-NEXT: s_mov_b32 s11, 0 +; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] +; GFX6-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] +; GFX6-NEXT: s_sub_i32 s9, 1, 64 +; GFX6-NEXT: s_sub_i32 s13, 64, 1 +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: s_cselect_b32 s18, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: s_cselect_b32 s19, 1, 0 +; GFX6-NEXT: s_lshr_b64 s[14:15], s[0:1], s13 +; GFX6-NEXT: s_lshl_b64 s[16:17], s[2:3], 1 +; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 +; GFX6-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] +; GFX6-NEXT: s_and_b32 s9, s18, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] +; GFX6-NEXT: s_and_b32 s9, s19, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX6-NEXT: s_sub_i32 s13, s8, 64 +; GFX6-NEXT: s_sub_i32 s9, 64, s8 +; GFX6-NEXT: s_cmp_lt_u32 s8, 64 +; GFX6-NEXT: s_cselect_b32 s16, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s8, 0 +; GFX6-NEXT: s_cselect_b32 s17, 1, 0 +; GFX6-NEXT: s_lshl_b64 s[2:3], s[10:11], s8 +; GFX6-NEXT: s_lshr_b64 s[14:15], s[10:11], s9 +; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 +; GFX6-NEXT: s_lshl_b64 s[10:11], s[10:11], s13 +; GFX6-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9] +; GFX6-NEXT: s_and_b32 s13, s16, 1 +; GFX6-NEXT: s_cmp_lg_u32 s13, 0 +; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] +; GFX6-NEXT: s_and_b32 s10, s17, 1 +; GFX6-NEXT: s_cmp_lg_u32 s10, 0 +; GFX6-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] +; GFX6-NEXT: s_sub_i32 s14, s12, 64 +; GFX6-NEXT: s_sub_i32 s13, 64, s12 +; GFX6-NEXT: s_cmp_lt_u32 s12, 64 +; GFX6-NEXT: s_cselect_b32 s15, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s12, 0 +; GFX6-NEXT: s_cselect_b32 s16, 1, 0 +; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 +; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 +; GFX6-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 +; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] +; GFX6-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 +; GFX6-NEXT: s_and_b32 s12, s15, 1 +; GFX6-NEXT: s_cmp_lg_u32 s12, 0 +; GFX6-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] +; GFX6-NEXT: s_and_b32 s10, s16, 1 +; GFX6-NEXT: s_cmp_lg_u32 s10, 0 +; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX6-NEXT: s_cmp_lg_u32 s12, 0 +; GFX6-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 +; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] +; GFX6-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_movk_i32 s10, 0x7f +; GFX8-NEXT: s_mov_b32 s11, 0 +; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] +; GFX8-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] +; GFX8-NEXT: s_sub_i32 s9, 1, 64 +; GFX8-NEXT: s_sub_i32 s13, 64, 1 +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: s_cselect_b32 s18, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: s_cselect_b32 s19, 1, 0 +; GFX8-NEXT: s_lshr_b64 s[14:15], s[0:1], s13 +; GFX8-NEXT: s_lshl_b64 s[16:17], s[2:3], 1 +; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 +; GFX8-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] +; GFX8-NEXT: s_and_b32 s9, s18, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] +; GFX8-NEXT: s_and_b32 s9, s19, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX8-NEXT: s_sub_i32 s13, s8, 64 +; GFX8-NEXT: s_sub_i32 s9, 64, s8 +; GFX8-NEXT: s_cmp_lt_u32 s8, 64 +; GFX8-NEXT: s_cselect_b32 s16, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s8, 0 +; GFX8-NEXT: s_cselect_b32 s17, 1, 0 +; GFX8-NEXT: s_lshl_b64 s[2:3], s[10:11], s8 +; GFX8-NEXT: s_lshr_b64 s[14:15], s[10:11], s9 +; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 +; GFX8-NEXT: s_lshl_b64 s[10:11], s[10:11], s13 +; GFX8-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9] +; GFX8-NEXT: s_and_b32 s13, s16, 1 +; GFX8-NEXT: s_cmp_lg_u32 s13, 0 +; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] +; GFX8-NEXT: s_and_b32 s10, s17, 1 +; GFX8-NEXT: s_cmp_lg_u32 s10, 0 +; GFX8-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] +; GFX8-NEXT: s_sub_i32 s14, s12, 64 +; GFX8-NEXT: s_sub_i32 s13, 64, s12 +; GFX8-NEXT: s_cmp_lt_u32 s12, 64 +; GFX8-NEXT: s_cselect_b32 s15, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s12, 0 +; GFX8-NEXT: s_cselect_b32 s16, 1, 0 +; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 +; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 +; GFX8-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 +; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] +; GFX8-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 +; GFX8-NEXT: s_and_b32 s12, s15, 1 +; GFX8-NEXT: s_cmp_lg_u32 s12, 0 +; GFX8-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] +; GFX8-NEXT: s_and_b32 s10, s16, 1 +; GFX8-NEXT: s_cmp_lg_u32 s10, 0 +; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX8-NEXT: s_cmp_lg_u32 s12, 0 +; GFX8-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 +; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] +; GFX8-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_movk_i32 s10, 0x7f +; GFX9-NEXT: s_mov_b32 s11, 0 +; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] +; GFX9-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] +; GFX9-NEXT: s_sub_i32 s9, 1, 64 +; GFX9-NEXT: s_sub_i32 s13, 64, 1 +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: s_cselect_b32 s18, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: s_cselect_b32 s19, 1, 0 +; GFX9-NEXT: s_lshr_b64 s[14:15], s[0:1], s13 +; GFX9-NEXT: s_lshl_b64 s[16:17], s[2:3], 1 +; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 +; GFX9-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] +; GFX9-NEXT: s_and_b32 s9, s18, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] +; GFX9-NEXT: s_and_b32 s9, s19, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: s_sub_i32 s13, s8, 64 +; GFX9-NEXT: s_sub_i32 s9, 64, s8 +; GFX9-NEXT: s_cmp_lt_u32 s8, 64 +; GFX9-NEXT: s_cselect_b32 s16, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s8, 0 +; GFX9-NEXT: s_cselect_b32 s17, 1, 0 +; GFX9-NEXT: s_lshl_b64 s[2:3], s[10:11], s8 +; GFX9-NEXT: s_lshr_b64 s[14:15], s[10:11], s9 +; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 +; GFX9-NEXT: s_lshl_b64 s[10:11], s[10:11], s13 +; GFX9-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9] +; GFX9-NEXT: s_and_b32 s13, s16, 1 +; GFX9-NEXT: s_cmp_lg_u32 s13, 0 +; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] +; GFX9-NEXT: s_and_b32 s10, s17, 1 +; GFX9-NEXT: s_cmp_lg_u32 s10, 0 +; GFX9-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] +; GFX9-NEXT: s_sub_i32 s14, s12, 64 +; GFX9-NEXT: s_sub_i32 s13, 64, s12 +; GFX9-NEXT: s_cmp_lt_u32 s12, 64 +; GFX9-NEXT: s_cselect_b32 s15, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s12, 0 +; GFX9-NEXT: s_cselect_b32 s16, 1, 0 +; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 +; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 +; GFX9-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 +; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] +; GFX9-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 +; GFX9-NEXT: s_and_b32 s12, s15, 1 +; GFX9-NEXT: s_cmp_lg_u32 s12, 0 +; GFX9-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] +; GFX9-NEXT: s_and_b32 s10, s16, 1 +; GFX9-NEXT: s_cmp_lg_u32 s10, 0 +; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX9-NEXT: s_cmp_lg_u32 s12, 0 +; GFX9-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 +; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] +; GFX9-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_movk_i32 s10, 0x7f +; GFX10-NEXT: s_mov_b32 s11, 0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] +; GFX10-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] +; GFX10-NEXT: s_sub_i32 s9, 1, 64 +; GFX10-NEXT: s_sub_i32 s10, 64, 1 +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: s_cselect_b32 s13, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: s_cselect_b32 s18, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[10:11], s[0:1], s10 +; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 1 +; GFX10-NEXT: s_and_b32 s13, s13, 1 +; GFX10-NEXT: s_lshl_b64 s[16:17], s[0:1], 1 +; GFX10-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15] +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 +; GFX10-NEXT: s_cmp_lg_u32 s13, 0 +; GFX10-NEXT: s_cselect_b64 s[14:15], s[16:17], 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX10-NEXT: s_and_b32 s9, s18, 1 +; GFX10-NEXT: s_cmp_lg_u32 s9, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX10-NEXT: s_sub_i32 s13, s8, 64 +; GFX10-NEXT: s_sub_i32 s2, 64, s8 +; GFX10-NEXT: s_cmp_lt_u32 s8, 64 +; GFX10-NEXT: s_cselect_b32 s16, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s8, 0 +; GFX10-NEXT: s_cselect_b32 s17, 1, 0 +; GFX10-NEXT: s_lshl_b64 s[10:11], s[0:1], s8 +; GFX10-NEXT: s_lshr_b64 s[2:3], s[14:15], s2 +; GFX10-NEXT: s_and_b32 s16, s16, 1 +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11] +; GFX10-NEXT: s_lshl_b64 s[8:9], s[14:15], s8 +; GFX10-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 +; GFX10-NEXT: s_cmp_lg_u32 s16, 0 +; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[10:11] +; GFX10-NEXT: s_and_b32 s10, s17, 1 +; GFX10-NEXT: s_cmp_lg_u32 s10, 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[0:1], s[2:3] +; GFX10-NEXT: s_sub_i32 s14, s12, 64 +; GFX10-NEXT: s_sub_i32 s10, 64, s12 +; GFX10-NEXT: s_cmp_lt_u32 s12, 64 +; GFX10-NEXT: s_cselect_b32 s15, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s12, 0 +; GFX10-NEXT: s_cselect_b32 s16, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], s12 +; GFX10-NEXT: s_lshl_b64 s[10:11], s[6:7], s10 +; GFX10-NEXT: s_lshr_b64 s[12:13], s[6:7], s12 +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] +; GFX10-NEXT: s_and_b32 s10, s15, 1 +; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 +; GFX10-NEXT: s_cmp_lg_u32 s10, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7] +; GFX10-NEXT: s_and_b32 s6, s16, 1 +; GFX10-NEXT: s_cmp_lg_u32 s6, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] +; GFX10-NEXT: s_cmp_lg_u32 s10, 0 +; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], 0 +; GFX10-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; GFX10-NEXT: ; return to shader part epilog + %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) + ret i128 %result +} + +define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) { +; GFX6-LABEL: v_fshr_i128: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_movk_i32 s4, 0x7f +; GFX6-NEXT: v_and_b32_e32 v14, s4, v8 +; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 +; GFX6-NEXT: v_and_b32_e32 v15, s4, v8 +; GFX6-NEXT: s_sub_i32 s5, 64, 1 +; GFX6-NEXT: s_sub_i32 s4, 1, 64 +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: s_cselect_b32 s6, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: v_lshr_b64 v[8:9], v[0:1], s5 +; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], 1 +; GFX6-NEXT: v_lshl_b64 v[12:13], v[0:1], 1 +; GFX6-NEXT: s_cselect_b32 s7, 1, 0 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s4 +; GFX6-NEXT: s_and_b32 s4, 1, s6 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX6-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX6-NEXT: s_and_b32 s4, 1, s7 +; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v12, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v13, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v15 +; GFX6-NEXT: v_lshr_b64 v[2:3], v[8:9], v2 +; GFX6-NEXT: v_lshl_b64 v[10:11], v[0:1], v15 +; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v15 +; GFX6-NEXT: v_lshl_b64 v[12:13], v[8:9], v15 +; GFX6-NEXT: v_or_b32_e32 v10, v2, v10 +; GFX6-NEXT: v_or_b32_e32 v11, v3, v11 +; GFX6-NEXT: v_lshl_b64 v[2:3], v[8:9], v16 +; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 +; GFX6-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 +; GFX6-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v11, v3, v1, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v14 +; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], v14 +; GFX6-NEXT: v_lshl_b64 v[2:3], v[6:7], v2 +; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14 +; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], v15 +; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 +; GFX6-NEXT: v_lshr_b64 v[8:9], v[6:7], v14 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc +; GFX6-NEXT: v_or_b32_e32 v0, v12, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v13, v1 +; GFX6-NEXT: v_or_b32_e32 v2, v10, v2 +; GFX6-NEXT: v_or_b32_e32 v3, v11, v3 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_movk_i32 s4, 0x7f +; GFX8-NEXT: v_and_b32_e32 v14, s4, v8 +; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8 +; GFX8-NEXT: v_and_b32_e32 v15, s4, v8 +; GFX8-NEXT: s_sub_i32 s5, 64, 1 +; GFX8-NEXT: s_sub_i32 s4, 1, 64 +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: s_cselect_b32 s6, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: v_lshrrev_b64 v[8:9], s5, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[10:11], 1, v[2:3] +; GFX8-NEXT: v_lshlrev_b64 v[12:13], 1, v[0:1] +; GFX8-NEXT: s_cselect_b32 s7, 1, 0 +; GFX8-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1] +; GFX8-NEXT: s_and_b32 s4, 1, s6 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX8-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX8-NEXT: s_and_b32 s4, 1, s7 +; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v15 +; GFX8-NEXT: v_lshrrev_b64 v[2:3], v2, v[8:9] +; GFX8-NEXT: v_lshlrev_b64 v[10:11], v15, v[0:1] +; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v15 +; GFX8-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9] +; GFX8-NEXT: v_or_b32_e32 v10, v2, v10 +; GFX8-NEXT: v_or_b32_e32 v11, v3, v11 +; GFX8-NEXT: v_lshlrev_b64 v[2:3], v16, v[8:9] +; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 +; GFX8-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 +; GFX8-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v11, v3, v1, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v14 +; GFX8-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5] +; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] +; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14 +; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX8-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7] +; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 +; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 +; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc +; GFX8-NEXT: v_or_b32_e32 v0, v12, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v13, v1 +; GFX8-NEXT: v_or_b32_e32 v2, v10, v2 +; GFX8-NEXT: v_or_b32_e32 v3, v11, v3 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_movk_i32 s4, 0x7f +; GFX9-NEXT: v_and_b32_e32 v14, s4, v8 +; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8 +; GFX9-NEXT: v_and_b32_e32 v15, s4, v8 +; GFX9-NEXT: s_sub_i32 s5, 64, 1 +; GFX9-NEXT: s_sub_i32 s4, 1, 64 +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: s_cselect_b32 s6, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: v_lshrrev_b64 v[8:9], s5, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[10:11], 1, v[2:3] +; GFX9-NEXT: v_lshlrev_b64 v[12:13], 1, v[0:1] +; GFX9-NEXT: s_cselect_b32 s7, 1, 0 +; GFX9-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1] +; GFX9-NEXT: s_and_b32 s4, 1, s6 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX9-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX9-NEXT: s_and_b32 s4, 1, s7 +; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v12, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v13, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 64, v15 +; GFX9-NEXT: v_lshrrev_b64 v[2:3], v2, v[8:9] +; GFX9-NEXT: v_lshlrev_b64 v[10:11], v15, v[0:1] +; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v15 +; GFX9-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9] +; GFX9-NEXT: v_or_b32_e32 v10, v2, v10 +; GFX9-NEXT: v_or_b32_e32 v11, v3, v11 +; GFX9-NEXT: v_lshlrev_b64 v[2:3], v16, v[8:9] +; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 +; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 +; GFX9-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 64, v14 +; GFX9-NEXT: v_cndmask_b32_e32 v11, v3, v1, vcc +; GFX9-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5] +; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] +; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14 +; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX9-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7] +; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 +; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc +; GFX9-NEXT: v_or_b32_e32 v0, v12, v0 +; GFX9-NEXT: v_or_b32_e32 v1, v13, v1 +; GFX9-NEXT: v_or_b32_e32 v2, v10, v2 +; GFX9-NEXT: v_or_b32_e32 v3, v11, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_sub_i32 s4, 64, 1 +; GFX10-NEXT: s_sub_i32 s6, 1, 64 +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: v_lshrrev_b64 v[9:10], s4, v[0:1] +; GFX10-NEXT: v_lshlrev_b64 v[11:12], 1, v[2:3] +; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: v_lshlrev_b64 v[13:14], 1, v[0:1] +; GFX10-NEXT: s_cselect_b32 s7, 1, 0 +; GFX10-NEXT: s_and_b32 s4, 1, vcc_lo +; GFX10-NEXT: v_lshlrev_b64 v[0:1], s6, v[0:1] +; GFX10-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s4 +; GFX10-NEXT: v_or_b32_e32 v10, v10, v12 +; GFX10-NEXT: v_xor_b32_e32 v15, -1, v8 +; GFX10-NEXT: s_movk_i32 s5, 0x7f +; GFX10-NEXT: s_and_b32 s6, 1, s7 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v11, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v10, s4 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s6 +; GFX10-NEXT: v_and_b32_e32 v19, s5, v15 +; GFX10-NEXT: v_and_b32_e32 v20, s5, v8 +; GFX10-NEXT: v_cndmask_b32_e32 v9, 0, v13, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v10, 0, v14, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v2, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v3, s4 +; GFX10-NEXT: v_sub_nc_u32_e32 v11, 64, v19 +; GFX10-NEXT: v_sub_nc_u32_e32 v17, 64, v20 +; GFX10-NEXT: v_mov_b32_e32 v25, v4 +; GFX10-NEXT: v_mov_b32_e32 v26, v5 +; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v19 +; GFX10-NEXT: v_lshrrev_b64 v[2:3], v11, v[9:10] +; GFX10-NEXT: v_lshlrev_b64 v[11:12], v19, v[0:1] +; GFX10-NEXT: v_lshlrev_b64 v[13:14], v19, v[9:10] +; GFX10-NEXT: v_subrev_nc_u32_e32 v21, 64, v20 +; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, v[9:10] +; GFX10-NEXT: v_lshrrev_b64 v[15:16], v20, v[25:26] +; GFX10-NEXT: v_lshlrev_b64 v[17:18], v17, v[6:7] +; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v19 +; GFX10-NEXT: v_or_b32_e32 v10, v3, v12 +; GFX10-NEXT: v_or_b32_e32 v11, v2, v11 +; GFX10-NEXT: v_lshrrev_b64 v[2:3], v21, v[6:7] +; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v20 +; GFX10-NEXT: v_cndmask_b32_e32 v23, 0, v13, vcc_lo +; GFX10-NEXT: v_or_b32_e32 v13, v15, v17 +; GFX10-NEXT: v_cndmask_b32_e32 v15, v9, v10, vcc_lo +; GFX10-NEXT: v_or_b32_e32 v10, v16, v18 +; GFX10-NEXT: v_lshrrev_b64 v[6:7], v20, v[6:7] +; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v19 +; GFX10-NEXT: v_cndmask_b32_e32 v11, v8, v11, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v13, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v10, s4 +; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v20 +; GFX10-NEXT: v_cndmask_b32_e64 v9, v15, v1, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v8, v11, v0, s6 +; GFX10-NEXT: v_cndmask_b32_e32 v10, 0, v14, vcc_lo +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v25, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, v26, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v6, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v7, s4 +; GFX10-NEXT: v_or_b32_e32 v0, v23, v0 +; GFX10-NEXT: v_or_b32_e32 v1, v10, v1 +; GFX10-NEXT: v_or_b32_e32 v2, v8, v2 +; GFX10-NEXT: v_or_b32_e32 v3, v9, v3 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) + ret i128 %result +} + +define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) { +; GFX6-LABEL: v_fshr_i128_ssv: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_sub_i32 s14, 1, 64 +; GFX6-NEXT: s_sub_i32 s10, 64, 1 +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: s_movk_i32 s8, 0x7f +; GFX6-NEXT: s_cselect_b32 s15, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: s_cselect_b32 s16, 1, 0 +; GFX6-NEXT: v_and_b32_e32 v6, s8, v0 +; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX6-NEXT: s_lshr_b64 s[10:11], s[0:1], s10 +; GFX6-NEXT: s_lshl_b64 s[12:13], s[2:3], 1 +; GFX6-NEXT: v_and_b32_e32 v7, s8, v0 +; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 +; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s14 +; GFX6-NEXT: s_and_b32 s12, s15, 1 +; GFX6-NEXT: s_cmp_lg_u32 s12, 0 +; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX6-NEXT: s_and_b32 s10, s16, 1 +; GFX6-NEXT: s_cmp_lg_u32 s10, 0 +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v7 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX6-NEXT: v_lshr_b64 v[0:1], s[8:9], v0 +; GFX6-NEXT: v_lshl_b64 v[2:3], s[0:1], v7 +; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v7 +; GFX6-NEXT: v_lshl_b64 v[4:5], s[8:9], v7 +; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX6-NEXT: v_lshl_b64 v[0:1], s[8:9], v8 +; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc +; GFX6-NEXT: v_mov_b32_e32 v2, s0 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX6-NEXT: v_mov_b32_e32 v3, s1 +; GFX6-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v6 +; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v6 +; GFX6-NEXT: v_lshl_b64 v[2:3], s[6:7], v2 +; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v6 +; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v11 +; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 +; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v6 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_mov_b32_e32 v2, s4 +; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 +; GFX6-NEXT: v_mov_b32_e32 v3, s5 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; GFX6-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v9, v1 +; GFX6-NEXT: v_or_b32_e32 v2, v7, v2 +; GFX6-NEXT: v_or_b32_e32 v3, v10, v3 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i128_ssv: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_sub_i32 s14, 1, 64 +; GFX8-NEXT: s_sub_i32 s10, 64, 1 +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: s_movk_i32 s8, 0x7f +; GFX8-NEXT: s_cselect_b32 s15, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: s_cselect_b32 s16, 1, 0 +; GFX8-NEXT: v_and_b32_e32 v6, s8, v0 +; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX8-NEXT: s_lshr_b64 s[10:11], s[0:1], s10 +; GFX8-NEXT: s_lshl_b64 s[12:13], s[2:3], 1 +; GFX8-NEXT: v_and_b32_e32 v7, s8, v0 +; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 +; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s14 +; GFX8-NEXT: s_and_b32 s12, s15, 1 +; GFX8-NEXT: s_cmp_lg_u32 s12, 0 +; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX8-NEXT: s_and_b32 s10, s16, 1 +; GFX8-NEXT: s_cmp_lg_u32 s10, 0 +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v7 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[8:9] +; GFX8-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1] +; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v7 +; GFX8-NEXT: v_lshlrev_b64 v[4:5], v7, s[8:9] +; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, s[8:9] +; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v6 +; GFX8-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5] +; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7] +; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v6 +; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7] +; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 +; GFX8-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 +; GFX8-NEXT: v_mov_b32_e32 v3, s5 +; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v9, v1 +; GFX8-NEXT: v_or_b32_e32 v2, v7, v2 +; GFX8-NEXT: v_or_b32_e32 v3, v10, v3 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i128_ssv: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_sub_i32 s14, 1, 64 +; GFX9-NEXT: s_sub_i32 s10, 64, 1 +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: s_movk_i32 s8, 0x7f +; GFX9-NEXT: s_cselect_b32 s15, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: v_and_b32_e32 v6, s8, v0 +; GFX9-NEXT: s_cselect_b32 s16, 1, 0 +; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX9-NEXT: s_lshr_b64 s[10:11], s[0:1], s10 +; GFX9-NEXT: s_lshl_b64 s[12:13], s[2:3], 1 +; GFX9-NEXT: v_and_b32_e32 v7, s8, v0 +; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 +; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s14 +; GFX9-NEXT: s_and_b32 s12, s15, 1 +; GFX9-NEXT: s_cmp_lg_u32 s12, 0 +; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX9-NEXT: s_and_b32 s10, s16, 1 +; GFX9-NEXT: s_cmp_lg_u32 s10, 0 +; GFX9-NEXT: v_sub_u32_e32 v0, 64, v7 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[8:9] +; GFX9-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1] +; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v7 +; GFX9-NEXT: v_lshlrev_b64 v[4:5], v7, s[8:9] +; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, s[8:9] +; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 64, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc +; GFX9-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5] +; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7] +; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v6 +; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7] +; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 +; GFX9-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 +; GFX9-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; GFX9-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX9-NEXT: v_or_b32_e32 v1, v9, v1 +; GFX9-NEXT: v_or_b32_e32 v2, v7, v2 +; GFX9-NEXT: v_or_b32_e32 v3, v10, v3 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i128_ssv: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_sub_i32 s14, 1, 64 +; GFX10-NEXT: s_sub_i32 s8, 64, 1 +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 +; GFX10-NEXT: s_cselect_b32 s15, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: s_movk_i32 s16, 0x7f +; GFX10-NEXT: s_cselect_b32 s17, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], s8 +; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], 1 +; GFX10-NEXT: v_and_b32_e32 v12, s16, v1 +; GFX10-NEXT: s_and_b32 s15, s15, 1 +; GFX10-NEXT: s_lshl_b64 s[12:13], s[0:1], 1 +; GFX10-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s14 +; GFX10-NEXT: s_cmp_lg_u32 s15, 0 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, 64, v12 +; GFX10-NEXT: s_cselect_b64 s[10:11], s[12:13], 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] +; GFX10-NEXT: s_and_b32 s8, s17, 1 +; GFX10-NEXT: v_and_b32_e32 v13, s16, v0 +; GFX10-NEXT: s_cmp_lg_u32 s8, 0 +; GFX10-NEXT: v_lshrrev_b64 v[0:1], v1, s[10:11] +; GFX10-NEXT: s_cselect_b64 s[8:9], s[2:3], s[0:1] +; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12 +; GFX10-NEXT: v_lshlrev_b64 v[2:3], v12, s[8:9] +; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13 +; GFX10-NEXT: v_lshrrev_b64 v[6:7], v13, s[4:5] +; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12 +; GFX10-NEXT: v_lshlrev_b64 v[15:16], v10, s[10:11] +; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 +; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] +; GFX10-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 64, v13 +; GFX10-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX10-NEXT: v_lshlrev_b64 v[4:5], v12, s[10:11] +; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 0, v13 +; GFX10-NEXT: v_or_b32_e32 v6, v6, v8 +; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[6:7] +; GFX10-NEXT: v_or_b32_e32 v7, v7, v9 +; GFX10-NEXT: v_cndmask_b32_e32 v8, v15, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v10, v16, v3, vcc_lo +; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] +; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v15, 0, v5, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v19, v8, s8, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, s9, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 +; GFX10-NEXT: v_or_b32_e32 v0, v11, v0 +; GFX10-NEXT: v_or_b32_e32 v1, v15, v1 +; GFX10-NEXT: v_or_b32_e32 v2, v19, v2 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_or_b32_e32 v3, v6, v3 +; GFX10-NEXT: ; return to shader part epilog + %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) + %cast.result = bitcast i128 %result to <4 x float> + ret <4 x float> %cast.result +} + +define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) { +; GFX6-LABEL: v_fshr_i128_svs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_movk_i32 s6, 0x7f +; GFX6-NEXT: s_mov_b32 s7, 0 +; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] +; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX6-NEXT: s_sub_i32 s5, 1, 64 +; GFX6-NEXT: s_sub_i32 s9, 64, 1 +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: s_cselect_b32 s14, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: s_cselect_b32 s15, 1, 0 +; GFX6-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 +; GFX6-NEXT: s_lshl_b64 s[12:13], s[2:3], 1 +; GFX6-NEXT: s_lshl_b64 s[6:7], s[0:1], 1 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 +; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] +; GFX6-NEXT: s_and_b32 s5, s14, 1 +; GFX6-NEXT: s_cmp_lg_u32 s5, 0 +; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX6-NEXT: s_and_b32 s5, s15, 1 +; GFX6-NEXT: s_cmp_lg_u32 s5, 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX6-NEXT: s_sub_i32 s9, s4, 64 +; GFX6-NEXT: s_sub_i32 s5, 64, s4 +; GFX6-NEXT: s_cmp_lt_u32 s4, 64 +; GFX6-NEXT: s_cselect_b32 s12, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s4, 0 +; GFX6-NEXT: s_cselect_b32 s13, 1, 0 +; GFX6-NEXT: s_lshl_b64 s[2:3], s[6:7], s4 +; GFX6-NEXT: s_lshr_b64 s[10:11], s[6:7], s5 +; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 +; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], s9 +; GFX6-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] +; GFX6-NEXT: s_and_b32 s9, s12, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX6-NEXT: s_and_b32 s6, s13, 1 +; GFX6-NEXT: s_cmp_lg_u32 s6, 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] +; GFX6-NEXT: s_sub_i32 s5, 64, s8 +; GFX6-NEXT: s_sub_i32 s4, s8, 64 +; GFX6-NEXT: s_cmp_lt_u32 s8, 64 +; GFX6-NEXT: s_cselect_b32 s6, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s8, 0 +; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s8 +; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s5 +; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s8 +; GFX6-NEXT: s_cselect_b32 s7, 1, 0 +; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s4 +; GFX6-NEXT: s_and_b32 s4, 1, s6 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 +; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 +; GFX6-NEXT: s_and_b32 s4, 1, s7 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: s_and_b32 s4, 1, s6 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc +; GFX6-NEXT: v_or_b32_e32 v0, s2, v0 +; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 +; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 +; GFX6-NEXT: v_or_b32_e32 v3, s1, v3 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i128_svs: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_movk_i32 s6, 0x7f +; GFX8-NEXT: s_mov_b32 s7, 0 +; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] +; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX8-NEXT: s_sub_i32 s5, 1, 64 +; GFX8-NEXT: s_sub_i32 s9, 64, 1 +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: s_cselect_b32 s14, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: s_cselect_b32 s15, 1, 0 +; GFX8-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 +; GFX8-NEXT: s_lshl_b64 s[12:13], s[2:3], 1 +; GFX8-NEXT: s_lshl_b64 s[6:7], s[0:1], 1 +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 +; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] +; GFX8-NEXT: s_and_b32 s5, s14, 1 +; GFX8-NEXT: s_cmp_lg_u32 s5, 0 +; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX8-NEXT: s_and_b32 s5, s15, 1 +; GFX8-NEXT: s_cmp_lg_u32 s5, 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX8-NEXT: s_sub_i32 s9, s4, 64 +; GFX8-NEXT: s_sub_i32 s5, 64, s4 +; GFX8-NEXT: s_cmp_lt_u32 s4, 64 +; GFX8-NEXT: s_cselect_b32 s12, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s4, 0 +; GFX8-NEXT: s_cselect_b32 s13, 1, 0 +; GFX8-NEXT: s_lshl_b64 s[2:3], s[6:7], s4 +; GFX8-NEXT: s_lshr_b64 s[10:11], s[6:7], s5 +; GFX8-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 +; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], s9 +; GFX8-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] +; GFX8-NEXT: s_and_b32 s9, s12, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX8-NEXT: s_and_b32 s6, s13, 1 +; GFX8-NEXT: s_cmp_lg_u32 s6, 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] +; GFX8-NEXT: s_sub_i32 s5, 64, s8 +; GFX8-NEXT: s_sub_i32 s4, s8, 64 +; GFX8-NEXT: s_cmp_lt_u32 s8, 64 +; GFX8-NEXT: s_cselect_b32 s6, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s8, 0 +; GFX8-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] +; GFX8-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] +; GFX8-NEXT: s_cselect_b32 s7, 1, 0 +; GFX8-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] +; GFX8-NEXT: s_and_b32 s4, 1, s6 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 +; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 +; GFX8-NEXT: s_and_b32 s4, 1, s7 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: s_and_b32 s4, 1, s6 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc +; GFX8-NEXT: v_or_b32_e32 v0, s2, v0 +; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 +; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 +; GFX8-NEXT: v_or_b32_e32 v3, s1, v3 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i128_svs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_movk_i32 s6, 0x7f +; GFX9-NEXT: s_mov_b32 s7, 0 +; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] +; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX9-NEXT: s_sub_i32 s5, 1, 64 +; GFX9-NEXT: s_sub_i32 s9, 64, 1 +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: s_cselect_b32 s14, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: s_cselect_b32 s15, 1, 0 +; GFX9-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 +; GFX9-NEXT: s_lshl_b64 s[12:13], s[2:3], 1 +; GFX9-NEXT: s_lshl_b64 s[6:7], s[0:1], 1 +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 +; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] +; GFX9-NEXT: s_and_b32 s5, s14, 1 +; GFX9-NEXT: s_cmp_lg_u32 s5, 0 +; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] +; GFX9-NEXT: s_and_b32 s5, s15, 1 +; GFX9-NEXT: s_cmp_lg_u32 s5, 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: s_sub_i32 s9, s4, 64 +; GFX9-NEXT: s_sub_i32 s5, 64, s4 +; GFX9-NEXT: s_cmp_lt_u32 s4, 64 +; GFX9-NEXT: s_cselect_b32 s12, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-NEXT: s_cselect_b32 s13, 1, 0 +; GFX9-NEXT: s_lshl_b64 s[2:3], s[6:7], s4 +; GFX9-NEXT: s_lshr_b64 s[10:11], s[6:7], s5 +; GFX9-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 +; GFX9-NEXT: s_lshl_b64 s[6:7], s[6:7], s9 +; GFX9-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] +; GFX9-NEXT: s_and_b32 s9, s12, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX9-NEXT: s_and_b32 s6, s13, 1 +; GFX9-NEXT: s_cmp_lg_u32 s6, 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] +; GFX9-NEXT: s_sub_i32 s5, 64, s8 +; GFX9-NEXT: s_sub_i32 s4, s8, 64 +; GFX9-NEXT: s_cmp_lt_u32 s8, 64 +; GFX9-NEXT: s_cselect_b32 s6, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s8, 0 +; GFX9-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] +; GFX9-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] +; GFX9-NEXT: s_cselect_b32 s7, 1, 0 +; GFX9-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] +; GFX9-NEXT: s_and_b32 s4, 1, s6 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 +; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 +; GFX9-NEXT: s_and_b32 s4, 1, s7 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: s_and_b32 s4, 1, s6 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc +; GFX9-NEXT: v_or_b32_e32 v0, s2, v0 +; GFX9-NEXT: v_or_b32_e32 v1, s3, v1 +; GFX9-NEXT: v_or_b32_e32 v2, s0, v2 +; GFX9-NEXT: v_or_b32_e32 v3, s1, v3 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i128_svs: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_movk_i32 s6, 0x7f +; GFX10-NEXT: s_mov_b32 s7, 0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] +; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX10-NEXT: s_sub_i32 s5, 1, 64 +; GFX10-NEXT: s_sub_i32 s6, 64, 1 +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] +; GFX10-NEXT: s_cselect_b32 s9, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: s_cselect_b32 s14, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 +; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], 1 +; GFX10-NEXT: s_and_b32 s9, s9, 1 +; GFX10-NEXT: s_lshl_b64 s[12:13], s[0:1], 1 +; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 +; GFX10-NEXT: s_cmp_lg_u32 s9, 0 +; GFX10-NEXT: s_cselect_b64 s[10:11], s[12:13], 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] +; GFX10-NEXT: s_and_b32 s5, s14, 1 +; GFX10-NEXT: s_cmp_lg_u32 s5, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX10-NEXT: s_sub_i32 s9, s4, 64 +; GFX10-NEXT: s_sub_i32 s2, 64, s4 +; GFX10-NEXT: s_cmp_lt_u32 s4, 64 +; GFX10-NEXT: s_cselect_b32 s12, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-NEXT: s_cselect_b32 s13, 1, 0 +; GFX10-NEXT: s_lshl_b64 s[6:7], s[0:1], s4 +; GFX10-NEXT: s_lshr_b64 s[2:3], s[10:11], s2 +; GFX10-NEXT: s_and_b32 s12, s12, 1 +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] +; GFX10-NEXT: s_lshl_b64 s[4:5], s[10:11], s4 +; GFX10-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 +; GFX10-NEXT: s_cmp_lg_u32 s12, 0 +; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[6:7] +; GFX10-NEXT: s_and_b32 s6, s13, 1 +; GFX10-NEXT: s_cmp_lg_u32 s6, 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[0:1], s[2:3] +; GFX10-NEXT: s_sub_i32 s0, 64, s8 +; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] +; GFX10-NEXT: s_sub_i32 s0, s8, 64 +; GFX10-NEXT: s_cmp_lt_u32 s8, 64 +; GFX10-NEXT: v_lshrrev_b64 v[11:12], s0, v[2:3] +; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s8, 0 +; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 +; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 +; GFX10-NEXT: s_cselect_b32 s0, 1, 0 +; GFX10-NEXT: s_and_b32 s1, 1, vcc_lo +; GFX10-NEXT: s_and_b32 s0, 1, s0 +; GFX10-NEXT: v_lshrrev_b64 v[2:3], s8, v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v7, v11, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v11, v12, v5, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 +; GFX10-NEXT: v_or_b32_e32 v0, s4, v0 +; GFX10-NEXT: v_or_b32_e32 v1, s5, v1 +; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 +; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 +; GFX10-NEXT: ; return to shader part epilog + %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) + %cast.result = bitcast i128 %result to <4 x float> + ret <4 x float> %cast.result +} + +define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) { +; GFX6-LABEL: v_fshr_i128_vss: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_movk_i32 s6, 0x7f +; GFX6-NEXT: s_mov_b32 s7, 0 +; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] +; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX6-NEXT: s_sub_i32 s6, 64, 1 +; GFX6-NEXT: s_sub_i32 s5, 1, 64 +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: s_cselect_b32 s7, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s6 +; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], 1 +; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], 1 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s5 +; GFX6-NEXT: s_and_b32 s5, 1, s7 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: v_or_b32_e32 v6, v4, v6 +; GFX6-NEXT: v_or_b32_e32 v7, v5, v7 +; GFX6-NEXT: s_and_b32 s5, 1, s9 +; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: s_sub_i32 s5, s4, 64 +; GFX6-NEXT: s_sub_i32 s6, 64, s4 +; GFX6-NEXT: s_cmp_lt_u32 s4, 64 +; GFX6-NEXT: s_cselect_b32 s7, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s4, 0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: v_lshr_b64 v[2:3], v[4:5], s6 +; GFX6-NEXT: v_lshl_b64 v[6:7], v[0:1], s4 +; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], s4 +; GFX6-NEXT: s_and_b32 s4, 1, s7 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: s_and_b32 s4, 1, s9 +; GFX6-NEXT: s_sub_i32 s10, s8, 64 +; GFX6-NEXT: s_sub_i32 s9, 64, s8 +; GFX6-NEXT: s_cmp_lt_u32 s8, 64 +; GFX6-NEXT: v_or_b32_e32 v6, v2, v6 +; GFX6-NEXT: v_or_b32_e32 v7, v3, v7 +; GFX6-NEXT: v_lshl_b64 v[2:3], v[4:5], s5 +; GFX6-NEXT: s_cselect_b32 s11, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s8, 0 +; GFX6-NEXT: s_cselect_b32 s12, 1, 0 +; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 +; GFX6-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 +; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 +; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 +; GFX6-NEXT: s_and_b32 s8, s11, 1 +; GFX6-NEXT: s_cmp_lg_u32 s8, 0 +; GFX6-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] +; GFX6-NEXT: s_and_b32 s6, s12, 1 +; GFX6-NEXT: s_cmp_lg_u32 s6, 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] +; GFX6-NEXT: s_cmp_lg_u32 s8, 0 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc +; GFX6-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v4 +; GFX6-NEXT: v_or_b32_e32 v1, s1, v5 +; GFX6-NEXT: v_or_b32_e32 v2, s2, v2 +; GFX6-NEXT: v_or_b32_e32 v3, s3, v3 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: v_fshr_i128_vss: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_movk_i32 s6, 0x7f +; GFX8-NEXT: s_mov_b32 s7, 0 +; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] +; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX8-NEXT: s_sub_i32 s6, 64, 1 +; GFX8-NEXT: s_sub_i32 s5, 1, 64 +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: s_cselect_b32 s7, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[6:7], 1, v[2:3] +; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] +; GFX8-NEXT: s_and_b32 s5, 1, s7 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: v_or_b32_e32 v6, v4, v6 +; GFX8-NEXT: v_or_b32_e32 v7, v5, v7 +; GFX8-NEXT: s_and_b32 s5, 1, s9 +; GFX8-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: s_sub_i32 s5, s4, 64 +; GFX8-NEXT: s_sub_i32 s6, 64, s4 +; GFX8-NEXT: s_cmp_lt_u32 s4, 64 +; GFX8-NEXT: s_cselect_b32 s7, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s4, 0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: v_lshrrev_b64 v[2:3], s6, v[4:5] +; GFX8-NEXT: v_lshlrev_b64 v[6:7], s4, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5] +; GFX8-NEXT: s_and_b32 s4, 1, s7 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: s_and_b32 s4, 1, s9 +; GFX8-NEXT: s_sub_i32 s10, s8, 64 +; GFX8-NEXT: s_sub_i32 s9, 64, s8 +; GFX8-NEXT: s_cmp_lt_u32 s8, 64 +; GFX8-NEXT: v_or_b32_e32 v6, v2, v6 +; GFX8-NEXT: v_or_b32_e32 v7, v3, v7 +; GFX8-NEXT: v_lshlrev_b64 v[2:3], s5, v[4:5] +; GFX8-NEXT: s_cselect_b32 s11, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s8, 0 +; GFX8-NEXT: s_cselect_b32 s12, 1, 0 +; GFX8-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 +; GFX8-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 +; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 +; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 +; GFX8-NEXT: s_and_b32 s8, s11, 1 +; GFX8-NEXT: s_cmp_lg_u32 s8, 0 +; GFX8-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] +; GFX8-NEXT: s_and_b32 s6, s12, 1 +; GFX8-NEXT: s_cmp_lg_u32 s6, 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] +; GFX8-NEXT: s_cmp_lg_u32 s8, 0 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc +; GFX8-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 +; GFX8-NEXT: v_or_b32_e32 v0, s0, v4 +; GFX8-NEXT: v_or_b32_e32 v1, s1, v5 +; GFX8-NEXT: v_or_b32_e32 v2, s2, v2 +; GFX8-NEXT: v_or_b32_e32 v3, s3, v3 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_fshr_i128_vss: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_movk_i32 s6, 0x7f +; GFX9-NEXT: s_mov_b32 s7, 0 +; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] +; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] +; GFX9-NEXT: s_sub_i32 s6, 64, 1 +; GFX9-NEXT: s_sub_i32 s5, 1, 64 +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: s_cselect_b32 s7, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[6:7], 1, v[2:3] +; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] +; GFX9-NEXT: s_and_b32 s5, 1, s7 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: v_or_b32_e32 v6, v4, v6 +; GFX9-NEXT: v_or_b32_e32 v7, v5, v7 +; GFX9-NEXT: s_and_b32 s5, 1, s9 +; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: s_sub_i32 s5, s4, 64 +; GFX9-NEXT: s_sub_i32 s6, 64, s4 +; GFX9-NEXT: s_cmp_lt_u32 s4, 64 +; GFX9-NEXT: s_cselect_b32 s7, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: v_lshrrev_b64 v[2:3], s6, v[4:5] +; GFX9-NEXT: v_lshlrev_b64 v[6:7], s4, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5] +; GFX9-NEXT: s_and_b32 s4, 1, s7 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: s_and_b32 s4, 1, s9 +; GFX9-NEXT: s_sub_i32 s10, s8, 64 +; GFX9-NEXT: s_sub_i32 s9, 64, s8 +; GFX9-NEXT: s_cmp_lt_u32 s8, 64 +; GFX9-NEXT: v_or_b32_e32 v6, v2, v6 +; GFX9-NEXT: v_or_b32_e32 v7, v3, v7 +; GFX9-NEXT: v_lshlrev_b64 v[2:3], s5, v[4:5] +; GFX9-NEXT: s_cselect_b32 s11, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s8, 0 +; GFX9-NEXT: s_cselect_b32 s12, 1, 0 +; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 +; GFX9-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 +; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 +; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 +; GFX9-NEXT: s_and_b32 s8, s11, 1 +; GFX9-NEXT: s_cmp_lg_u32 s8, 0 +; GFX9-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] +; GFX9-NEXT: s_and_b32 s6, s12, 1 +; GFX9-NEXT: s_cmp_lg_u32 s6, 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: s_cmp_lg_u32 s8, 0 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc +; GFX9-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 +; GFX9-NEXT: v_or_b32_e32 v0, s0, v4 +; GFX9-NEXT: v_or_b32_e32 v1, s1, v5 +; GFX9-NEXT: v_or_b32_e32 v2, s2, v2 +; GFX9-NEXT: v_or_b32_e32 v3, s3, v3 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: v_fshr_i128_vss: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_movk_i32 s6, 0x7f +; GFX10-NEXT: s_mov_b32 s7, 0 +; GFX10-NEXT: v_lshlrev_b64 v[6:7], 1, v[2:3] +; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] +; GFX10-NEXT: s_andn2_b64 s[6:7], s[6:7], s[4:5] +; GFX10-NEXT: s_sub_i32 s4, 64, 1 +; GFX10-NEXT: s_sub_i32 s5, 1, 64 +; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: v_lshlrev_b64 v[13:14], s5, v[0:1] +; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1] +; GFX10-NEXT: s_cselect_b32 s7, 1, 0 +; GFX10-NEXT: s_and_b32 s4, 1, vcc_lo +; GFX10-NEXT: v_or_b32_e32 v6, v4, v6 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s4 +; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 +; GFX10-NEXT: s_and_b32 s5, 1, s7 +; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc_lo +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_cndmask_b32_e64 v0, v13, v6, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v14, v5, s4 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s5 +; GFX10-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v2, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v3, s4 +; GFX10-NEXT: s_sub_i32 s4, 64, s6 +; GFX10-NEXT: v_lshlrev_b64 v[8:9], s6, v[4:5] +; GFX10-NEXT: v_lshrrev_b64 v[11:12], s4, v[4:5] +; GFX10-NEXT: s_sub_i32 s4, s6, 64 +; GFX10-NEXT: v_lshlrev_b64 v[6:7], s6, v[0:1] +; GFX10-NEXT: s_cmp_lt_u32 s6, 64 +; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s6, 0 +; GFX10-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-NEXT: v_or_b32_e32 v6, v11, v6 +; GFX10-NEXT: v_or_b32_e32 v7, v12, v7 +; GFX10-NEXT: v_lshlrev_b64 v[11:12], s4, v[4:5] +; GFX10-NEXT: s_and_b32 s4, 1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 +; GFX10-NEXT: s_and_b32 s4, 1, s5 +; GFX10-NEXT: s_sub_i32 s10, s8, 64 +; GFX10-NEXT: s_sub_i32 s6, 64, s8 +; GFX10-NEXT: s_cmp_lt_u32 s8, 64 +; GFX10-NEXT: v_cndmask_b32_e32 v2, v11, v6, vcc_lo +; GFX10-NEXT: s_cselect_b32 s11, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s8, 0 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v12, v7, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 +; GFX10-NEXT: s_cselect_b32 s12, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s8 +; GFX10-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 +; GFX10-NEXT: s_lshr_b64 s[8:9], s[2:3], s8 +; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] +; GFX10-NEXT: s_and_b32 s6, s11, 1 +; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 +; GFX10-NEXT: s_cmp_lg_u32 s6, 0 +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo +; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] +; GFX10-NEXT: s_and_b32 s4, s12, 1 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo +; GFX10-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: s_cmp_lg_u32 s6, 0 +; GFX10-NEXT: v_or_b32_e32 v0, s0, v4 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[8:9], 0 +; GFX10-NEXT: v_or_b32_e32 v1, s1, v5 +; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 +; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 +; GFX10-NEXT: ; return to shader part epilog + %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) + %cast.result = bitcast i128 %result to <4 x float> + ret <4 x float> %cast.result +} + +define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) { +; GFX6-LABEL: s_fshr_i128_65: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s9, 0 +; GFX6-NEXT: s_movk_i32 s8, 0x41 +; GFX6-NEXT: s_movk_i32 s10, 0x7f +; GFX6-NEXT: s_mov_b32 s11, s9 +; GFX6-NEXT: s_andn2_b64 s[10:11], s[10:11], s[8:9] +; GFX6-NEXT: s_sub_i32 s9, 1, 64 +; GFX6-NEXT: s_sub_i32 s11, 64, 1 +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: s_cselect_b32 s18, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: s_cselect_b32 s19, 1, 0 +; GFX6-NEXT: s_lshr_b64 s[14:15], s[0:1], s11 +; GFX6-NEXT: s_lshl_b64 s[16:17], s[2:3], 1 +; GFX6-NEXT: s_lshl_b64 s[12:13], s[0:1], 1 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 +; GFX6-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] +; GFX6-NEXT: s_and_b32 s9, s18, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[12:13], s[12:13], 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] +; GFX6-NEXT: s_and_b32 s9, s19, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX6-NEXT: s_sub_i32 s9, s10, 64 +; GFX6-NEXT: s_sub_i32 s11, 64, s10 +; GFX6-NEXT: s_cmp_lt_u32 s10, 64 +; GFX6-NEXT: s_cselect_b32 s16, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s10, 0 +; GFX6-NEXT: s_cselect_b32 s17, 1, 0 +; GFX6-NEXT: s_lshl_b64 s[2:3], s[12:13], s10 +; GFX6-NEXT: s_lshr_b64 s[14:15], s[12:13], s11 +; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 +; GFX6-NEXT: s_lshl_b64 s[12:13], s[12:13], s9 +; GFX6-NEXT: s_or_b64 s[10:11], s[14:15], s[10:11] +; GFX6-NEXT: s_and_b32 s9, s16, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX6-NEXT: s_cselect_b64 s[10:11], s[10:11], s[12:13] +; GFX6-NEXT: s_and_b32 s9, s17, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[10:11], s[0:1], s[10:11] +; GFX6-NEXT: s_sub_i32 s14, s8, 64 +; GFX6-NEXT: s_sub_i32 s12, 64, s8 +; GFX6-NEXT: s_cmp_lt_u32 s8, 64 +; GFX6-NEXT: s_cselect_b32 s15, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s8, 0 +; GFX6-NEXT: s_cselect_b32 s16, 1, 0 +; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], s8 +; GFX6-NEXT: s_lshl_b64 s[12:13], s[6:7], s12 +; GFX6-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 +; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] +; GFX6-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 +; GFX6-NEXT: s_and_b32 s12, s15, 1 +; GFX6-NEXT: s_cmp_lg_u32 s12, 0 +; GFX6-NEXT: s_cselect_b64 s[6:7], s[8:9], s[6:7] +; GFX6-NEXT: s_and_b32 s8, s16, 1 +; GFX6-NEXT: s_cmp_lg_u32 s8, 0 +; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX6-NEXT: s_cmp_lg_u32 s12, 0 +; GFX6-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 +; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] +; GFX6-NEXT: s_or_b64 s[2:3], s[10:11], s[6:7] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_i128_65: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_mov_b32 s9, 0 +; GFX8-NEXT: s_movk_i32 s8, 0x41 +; GFX8-NEXT: s_movk_i32 s10, 0x7f +; GFX8-NEXT: s_mov_b32 s11, s9 +; GFX8-NEXT: s_andn2_b64 s[10:11], s[10:11], s[8:9] +; GFX8-NEXT: s_sub_i32 s9, 1, 64 +; GFX8-NEXT: s_sub_i32 s11, 64, 1 +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: s_cselect_b32 s18, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: s_cselect_b32 s19, 1, 0 +; GFX8-NEXT: s_lshr_b64 s[14:15], s[0:1], s11 +; GFX8-NEXT: s_lshl_b64 s[16:17], s[2:3], 1 +; GFX8-NEXT: s_lshl_b64 s[12:13], s[0:1], 1 +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 +; GFX8-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] +; GFX8-NEXT: s_and_b32 s9, s18, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[12:13], s[12:13], 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] +; GFX8-NEXT: s_and_b32 s9, s19, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX8-NEXT: s_sub_i32 s9, s10, 64 +; GFX8-NEXT: s_sub_i32 s11, 64, s10 +; GFX8-NEXT: s_cmp_lt_u32 s10, 64 +; GFX8-NEXT: s_cselect_b32 s16, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s10, 0 +; GFX8-NEXT: s_cselect_b32 s17, 1, 0 +; GFX8-NEXT: s_lshl_b64 s[2:3], s[12:13], s10 +; GFX8-NEXT: s_lshr_b64 s[14:15], s[12:13], s11 +; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 +; GFX8-NEXT: s_lshl_b64 s[12:13], s[12:13], s9 +; GFX8-NEXT: s_or_b64 s[10:11], s[14:15], s[10:11] +; GFX8-NEXT: s_and_b32 s9, s16, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX8-NEXT: s_cselect_b64 s[10:11], s[10:11], s[12:13] +; GFX8-NEXT: s_and_b32 s9, s17, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[10:11], s[0:1], s[10:11] +; GFX8-NEXT: s_sub_i32 s14, s8, 64 +; GFX8-NEXT: s_sub_i32 s12, 64, s8 +; GFX8-NEXT: s_cmp_lt_u32 s8, 64 +; GFX8-NEXT: s_cselect_b32 s15, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s8, 0 +; GFX8-NEXT: s_cselect_b32 s16, 1, 0 +; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], s8 +; GFX8-NEXT: s_lshl_b64 s[12:13], s[6:7], s12 +; GFX8-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 +; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] +; GFX8-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 +; GFX8-NEXT: s_and_b32 s12, s15, 1 +; GFX8-NEXT: s_cmp_lg_u32 s12, 0 +; GFX8-NEXT: s_cselect_b64 s[6:7], s[8:9], s[6:7] +; GFX8-NEXT: s_and_b32 s8, s16, 1 +; GFX8-NEXT: s_cmp_lg_u32 s8, 0 +; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX8-NEXT: s_cmp_lg_u32 s12, 0 +; GFX8-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 +; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] +; GFX8-NEXT: s_or_b64 s[2:3], s[10:11], s[6:7] +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_i128_65: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s9, 0 +; GFX9-NEXT: s_movk_i32 s8, 0x41 +; GFX9-NEXT: s_movk_i32 s10, 0x7f +; GFX9-NEXT: s_mov_b32 s11, s9 +; GFX9-NEXT: s_andn2_b64 s[10:11], s[10:11], s[8:9] +; GFX9-NEXT: s_sub_i32 s9, 1, 64 +; GFX9-NEXT: s_sub_i32 s11, 64, 1 +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: s_cselect_b32 s18, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: s_cselect_b32 s19, 1, 0 +; GFX9-NEXT: s_lshr_b64 s[14:15], s[0:1], s11 +; GFX9-NEXT: s_lshl_b64 s[16:17], s[2:3], 1 +; GFX9-NEXT: s_lshl_b64 s[12:13], s[0:1], 1 +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 +; GFX9-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] +; GFX9-NEXT: s_and_b32 s9, s18, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[12:13], s[12:13], 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] +; GFX9-NEXT: s_and_b32 s9, s19, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: s_sub_i32 s9, s10, 64 +; GFX9-NEXT: s_sub_i32 s11, 64, s10 +; GFX9-NEXT: s_cmp_lt_u32 s10, 64 +; GFX9-NEXT: s_cselect_b32 s16, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s10, 0 +; GFX9-NEXT: s_cselect_b32 s17, 1, 0 +; GFX9-NEXT: s_lshl_b64 s[2:3], s[12:13], s10 +; GFX9-NEXT: s_lshr_b64 s[14:15], s[12:13], s11 +; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 +; GFX9-NEXT: s_lshl_b64 s[12:13], s[12:13], s9 +; GFX9-NEXT: s_or_b64 s[10:11], s[14:15], s[10:11] +; GFX9-NEXT: s_and_b32 s9, s16, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX9-NEXT: s_cselect_b64 s[10:11], s[10:11], s[12:13] +; GFX9-NEXT: s_and_b32 s9, s17, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[10:11], s[0:1], s[10:11] +; GFX9-NEXT: s_sub_i32 s14, s8, 64 +; GFX9-NEXT: s_sub_i32 s12, 64, s8 +; GFX9-NEXT: s_cmp_lt_u32 s8, 64 +; GFX9-NEXT: s_cselect_b32 s15, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s8, 0 +; GFX9-NEXT: s_cselect_b32 s16, 1, 0 +; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], s8 +; GFX9-NEXT: s_lshl_b64 s[12:13], s[6:7], s12 +; GFX9-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 +; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] +; GFX9-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 +; GFX9-NEXT: s_and_b32 s12, s15, 1 +; GFX9-NEXT: s_cmp_lg_u32 s12, 0 +; GFX9-NEXT: s_cselect_b64 s[6:7], s[8:9], s[6:7] +; GFX9-NEXT: s_and_b32 s8, s16, 1 +; GFX9-NEXT: s_cmp_lg_u32 s8, 0 +; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX9-NEXT: s_cmp_lg_u32 s12, 0 +; GFX9-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 +; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] +; GFX9-NEXT: s_or_b64 s[2:3], s[10:11], s[6:7] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_i128_65: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 s9, 0 +; GFX10-NEXT: s_movk_i32 s8, 0x41 +; GFX10-NEXT: s_movk_i32 s10, 0x7f +; GFX10-NEXT: s_mov_b32 s11, s9 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_andn2_b64 s[10:11], s[10:11], s[8:9] +; GFX10-NEXT: s_sub_i32 s9, 1, 64 +; GFX10-NEXT: s_sub_i32 s11, 64, 1 +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: s_cselect_b32 s18, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: s_cselect_b32 s19, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[12:13], s[0:1], s11 +; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 1 +; GFX10-NEXT: s_and_b32 s11, s18, 1 +; GFX10-NEXT: s_lshl_b64 s[16:17], s[0:1], 1 +; GFX10-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 +; GFX10-NEXT: s_cmp_lg_u32 s11, 0 +; GFX10-NEXT: s_cselect_b64 s[14:15], s[16:17], 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1] +; GFX10-NEXT: s_and_b32 s9, s19, 1 +; GFX10-NEXT: s_cmp_lg_u32 s9, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX10-NEXT: s_sub_i32 s9, s10, 64 +; GFX10-NEXT: s_sub_i32 s2, 64, s10 +; GFX10-NEXT: s_cmp_lt_u32 s10, 64 +; GFX10-NEXT: s_cselect_b32 s16, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s10, 0 +; GFX10-NEXT: s_cselect_b32 s17, 1, 0 +; GFX10-NEXT: s_lshl_b64 s[12:13], s[0:1], s10 +; GFX10-NEXT: s_lshr_b64 s[2:3], s[14:15], s2 +; GFX10-NEXT: s_and_b32 s16, s16, 1 +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[12:13] +; GFX10-NEXT: s_lshl_b64 s[10:11], s[14:15], s10 +; GFX10-NEXT: s_lshl_b64 s[12:13], s[14:15], s9 +; GFX10-NEXT: s_cmp_lg_u32 s16, 0 +; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[12:13] +; GFX10-NEXT: s_and_b32 s9, s17, 1 +; GFX10-NEXT: s_cmp_lg_u32 s9, 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[0:1], s[2:3] +; GFX10-NEXT: s_sub_i32 s14, s8, 64 +; GFX10-NEXT: s_sub_i32 s9, 64, s8 +; GFX10-NEXT: s_cmp_lt_u32 s8, 64 +; GFX10-NEXT: s_cselect_b32 s15, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s8, 0 +; GFX10-NEXT: s_cselect_b32 s16, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], s8 +; GFX10-NEXT: s_lshl_b64 s[12:13], s[6:7], s9 +; GFX10-NEXT: s_lshr_b64 s[8:9], s[6:7], s8 +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13] +; GFX10-NEXT: s_and_b32 s12, s15, 1 +; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 +; GFX10-NEXT: s_cmp_lg_u32 s12, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7] +; GFX10-NEXT: s_and_b32 s6, s16, 1 +; GFX10-NEXT: s_cmp_lg_u32 s6, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] +; GFX10-NEXT: s_cmp_lg_u32 s12, 0 +; GFX10-NEXT: s_cselect_b64 s[4:5], s[8:9], 0 +; GFX10-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1] +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; GFX10-NEXT: ; return to shader part epilog + %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) + ret i128 %result +} + +define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) { +; GFX6-LABEL: v_fshr_i128_65: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s5, 0 +; GFX6-NEXT: s_movk_i32 s4, 0x41 +; GFX6-NEXT: s_movk_i32 s6, 0x7f +; GFX6-NEXT: s_mov_b32 s7, s5 +; GFX6-NEXT: s_andn2_b64 s[6:7], s[6:7], s[4:5] +; GFX6-NEXT: s_sub_i32 s7, 64, 1 +; GFX6-NEXT: s_sub_i32 s5, 1, 64 +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: s_cselect_b32 s8, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: v_lshr_b64 v[8:9], v[0:1], s7 +; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], 1 +; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: v_lshl_b64 v[12:13], v[0:1], 1 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s5 +; GFX6-NEXT: s_and_b32 s5, 1, s8 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX6-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX6-NEXT: s_and_b32 s5, 1, s9 +; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v12, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v13, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: s_sub_i32 s7, 64, s6 +; GFX6-NEXT: s_sub_i32 s5, s6, 64 +; GFX6-NEXT: s_cmp_lt_u32 s6, 64 +; GFX6-NEXT: v_lshr_b64 v[2:3], v[8:9], s7 +; GFX6-NEXT: v_lshl_b64 v[10:11], v[0:1], s6 +; GFX6-NEXT: s_cselect_b32 s8, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s6, 0 +; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: v_lshl_b64 v[12:13], v[8:9], s6 +; GFX6-NEXT: v_or_b32_e32 v10, v2, v10 +; GFX6-NEXT: v_or_b32_e32 v11, v3, v11 +; GFX6-NEXT: v_lshl_b64 v[2:3], v[8:9], s5 +; GFX6-NEXT: s_and_b32 s5, 1, s8 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: s_and_b32 s5, 1, s9 +; GFX6-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: s_sub_i32 s5, s4, 64 +; GFX6-NEXT: s_sub_i32 s6, 64, s4 +; GFX6-NEXT: s_cmp_lt_u32 s4, 64 +; GFX6-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v11, v3, v1, vcc +; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], s4 +; GFX6-NEXT: v_lshl_b64 v[2:3], v[6:7], s6 +; GFX6-NEXT: s_cselect_b32 s7, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s4, 0 +; GFX6-NEXT: s_cselect_b32 s8, 1, 0 +; GFX6-NEXT: v_lshr_b64 v[8:9], v[6:7], s4 +; GFX6-NEXT: s_and_b32 s4, 1, s7 +; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: s_and_b32 s4, 1, s8 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: s_and_b32 s4, 1, s7 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc +; GFX6-NEXT: v_or_b32_e32 v0, v12, v0 +; GFX6-NEXT: v_or_b32_e32 v1, v13, v1 +; GFX6-NEXT: v_or_b32_e32 v2, v10, v2 +; GFX6-NEXT: v_or_b32_e32 v3, v11, v3 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_i128_65: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s5, 0 +; GFX8-NEXT: s_movk_i32 s4, 0x41 +; GFX8-NEXT: s_movk_i32 s6, 0x7f +; GFX8-NEXT: s_mov_b32 s7, s5 +; GFX8-NEXT: s_andn2_b64 s[6:7], s[6:7], s[4:5] +; GFX8-NEXT: s_sub_i32 s7, 64, 1 +; GFX8-NEXT: s_sub_i32 s5, 1, 64 +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: s_cselect_b32 s8, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: v_lshrrev_b64 v[8:9], s7, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[10:11], 1, v[2:3] +; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: v_lshlrev_b64 v[12:13], 1, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] +; GFX8-NEXT: s_and_b32 s5, 1, s8 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX8-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX8-NEXT: s_and_b32 s5, 1, s9 +; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: s_sub_i32 s7, 64, s6 +; GFX8-NEXT: s_sub_i32 s5, s6, 64 +; GFX8-NEXT: s_cmp_lt_u32 s6, 64 +; GFX8-NEXT: v_lshrrev_b64 v[2:3], s7, v[8:9] +; GFX8-NEXT: v_lshlrev_b64 v[10:11], s6, v[0:1] +; GFX8-NEXT: s_cselect_b32 s8, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s6, 0 +; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: v_lshlrev_b64 v[12:13], s6, v[8:9] +; GFX8-NEXT: v_or_b32_e32 v10, v2, v10 +; GFX8-NEXT: v_or_b32_e32 v11, v3, v11 +; GFX8-NEXT: v_lshlrev_b64 v[2:3], s5, v[8:9] +; GFX8-NEXT: s_and_b32 s5, 1, s8 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: s_and_b32 s5, 1, s9 +; GFX8-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: s_sub_i32 s5, s4, 64 +; GFX8-NEXT: s_sub_i32 s6, 64, s4 +; GFX8-NEXT: s_cmp_lt_u32 s4, 64 +; GFX8-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v11, v3, v1, vcc +; GFX8-NEXT: v_lshrrev_b64 v[0:1], s4, v[4:5] +; GFX8-NEXT: v_lshlrev_b64 v[2:3], s6, v[6:7] +; GFX8-NEXT: s_cselect_b32 s7, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s4, 0 +; GFX8-NEXT: s_cselect_b32 s8, 1, 0 +; GFX8-NEXT: v_lshrrev_b64 v[8:9], s4, v[6:7] +; GFX8-NEXT: s_and_b32 s4, 1, s7 +; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX8-NEXT: v_lshrrev_b64 v[0:1], s5, v[6:7] +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: s_and_b32 s4, 1, s8 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: s_and_b32 s4, 1, s7 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc +; GFX8-NEXT: v_or_b32_e32 v0, v12, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v13, v1 +; GFX8-NEXT: v_or_b32_e32 v2, v10, v2 +; GFX8-NEXT: v_or_b32_e32 v3, v11, v3 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_i128_65: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s5, 0 +; GFX9-NEXT: s_movk_i32 s4, 0x41 +; GFX9-NEXT: s_movk_i32 s6, 0x7f +; GFX9-NEXT: s_mov_b32 s7, s5 +; GFX9-NEXT: s_andn2_b64 s[6:7], s[6:7], s[4:5] +; GFX9-NEXT: s_sub_i32 s7, 64, 1 +; GFX9-NEXT: s_sub_i32 s5, 1, 64 +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: v_lshrrev_b64 v[8:9], s7, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[10:11], 1, v[2:3] +; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: v_lshlrev_b64 v[12:13], 1, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] +; GFX9-NEXT: s_and_b32 s5, 1, s8 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX9-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX9-NEXT: s_and_b32 s5, 1, s9 +; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v12, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v13, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: s_sub_i32 s7, 64, s6 +; GFX9-NEXT: s_sub_i32 s5, s6, 64 +; GFX9-NEXT: s_cmp_lt_u32 s6, 64 +; GFX9-NEXT: v_lshrrev_b64 v[2:3], s7, v[8:9] +; GFX9-NEXT: v_lshlrev_b64 v[10:11], s6, v[0:1] +; GFX9-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s6, 0 +; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: v_lshlrev_b64 v[12:13], s6, v[8:9] +; GFX9-NEXT: v_or_b32_e32 v10, v2, v10 +; GFX9-NEXT: v_or_b32_e32 v11, v3, v11 +; GFX9-NEXT: v_lshlrev_b64 v[2:3], s5, v[8:9] +; GFX9-NEXT: s_and_b32 s5, 1, s8 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: s_and_b32 s5, 1, s9 +; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: s_sub_i32 s5, s4, 64 +; GFX9-NEXT: s_sub_i32 s6, 64, s4 +; GFX9-NEXT: s_cmp_lt_u32 s4, 64 +; GFX9-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v11, v3, v1, vcc +; GFX9-NEXT: v_lshrrev_b64 v[0:1], s4, v[4:5] +; GFX9-NEXT: v_lshlrev_b64 v[2:3], s6, v[6:7] +; GFX9-NEXT: s_cselect_b32 s7, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-NEXT: v_lshrrev_b64 v[8:9], s4, v[6:7] +; GFX9-NEXT: s_and_b32 s4, 1, s7 +; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX9-NEXT: v_lshrrev_b64 v[0:1], s5, v[6:7] +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: s_and_b32 s4, 1, s8 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: s_and_b32 s4, 1, s7 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc +; GFX9-NEXT: v_or_b32_e32 v0, v12, v0 +; GFX9-NEXT: v_or_b32_e32 v1, v13, v1 +; GFX9-NEXT: v_or_b32_e32 v2, v10, v2 +; GFX9-NEXT: v_or_b32_e32 v3, v11, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_i128_65: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s7, 0 +; GFX10-NEXT: s_movk_i32 s6, 0x41 +; GFX10-NEXT: s_movk_i32 s4, 0x7f +; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: v_lshlrev_b64 v[10:11], 1, v[2:3] +; GFX10-NEXT: s_andn2_b64 s[8:9], s[4:5], s[6:7] +; GFX10-NEXT: s_sub_i32 s4, 64, 1 +; GFX10-NEXT: s_sub_i32 s5, 1, 64 +; GFX10-NEXT: v_lshrrev_b64 v[8:9], s4, v[0:1] +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: v_lshlrev_b64 v[17:18], s5, v[0:1] +; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: v_lshlrev_b64 v[12:13], 1, v[0:1] +; GFX10-NEXT: s_cselect_b32 s7, 1, 0 +; GFX10-NEXT: s_and_b32 s4, 1, vcc_lo +; GFX10-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s4 +; GFX10-NEXT: v_or_b32_e32 v9, v9, v11 +; GFX10-NEXT: s_and_b32 s5, 1, s7 +; GFX10-NEXT: v_cndmask_b32_e32 v8, 0, v12, vcc_lo +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_cndmask_b32_e64 v0, v17, v10, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v18, v9, s4 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s5 +; GFX10-NEXT: v_cndmask_b32_e32 v9, 0, v13, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v2, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v3, s4 +; GFX10-NEXT: s_sub_i32 s4, 64, s8 +; GFX10-NEXT: v_lshlrev_b64 v[12:13], s8, v[8:9] +; GFX10-NEXT: v_lshrrev_b64 v[15:16], s4, v[8:9] +; GFX10-NEXT: s_sub_i32 s4, s8, 64 +; GFX10-NEXT: v_lshlrev_b64 v[10:11], s8, v[0:1] +; GFX10-NEXT: s_cmp_lt_u32 s8, 64 +; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s8, 0 +; GFX10-NEXT: v_cndmask_b32_e32 v19, 0, v12, vcc_lo +; GFX10-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-NEXT: v_or_b32_e32 v10, v15, v10 +; GFX10-NEXT: v_or_b32_e32 v14, v16, v11 +; GFX10-NEXT: v_lshlrev_b64 v[15:16], s4, v[8:9] +; GFX10-NEXT: s_and_b32 s4, 1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v23, 0, v13, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 +; GFX10-NEXT: s_sub_i32 s4, 64, s6 +; GFX10-NEXT: v_lshrrev_b64 v[8:9], s6, v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v10, vcc_lo +; GFX10-NEXT: v_lshlrev_b64 v[10:11], s4, v[6:7] +; GFX10-NEXT: s_and_b32 s4, 1, s5 +; GFX10-NEXT: s_sub_i32 s5, s6, 64 +; GFX10-NEXT: s_cmp_lt_u32 s6, 64 +; GFX10-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc_lo +; GFX10-NEXT: v_lshrrev_b64 v[2:3], s5, v[6:7] +; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s6, 0 +; GFX10-NEXT: v_or_b32_e32 v8, v8, v10 +; GFX10-NEXT: v_or_b32_e32 v9, v9, v11 +; GFX10-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-NEXT: v_lshrrev_b64 v[6:7], s6, v[6:7] +; GFX10-NEXT: s_and_b32 s5, 1, s5 +; GFX10-NEXT: s_and_b32 s6, 1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s4 +; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, s5 +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v8, v15, v0, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v4, s5 +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v9, v14, v1, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, v5, s5 +; GFX10-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc_lo +; GFX10-NEXT: v_or_b32_e32 v0, v19, v0 +; GFX10-NEXT: v_or_b32_e32 v2, v8, v2 +; GFX10-NEXT: v_or_b32_e32 v1, v23, v1 +; GFX10-NEXT: v_or_b32_e32 v3, v9, v3 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) + ret i128 %result +} + +define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) { +; GFX6-LABEL: s_fshr_v2i128: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_movk_i32 s18, 0x7f +; GFX6-NEXT: s_mov_b32 s19, 0 +; GFX6-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] +; GFX6-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] +; GFX6-NEXT: s_sub_i32 s30, 1, 64 +; GFX6-NEXT: s_sub_i32 s31, 64, 1 +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: s_cselect_b32 s17, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: s_cselect_b32 s23, 1, 0 +; GFX6-NEXT: s_lshr_b64 s[26:27], s[0:1], s31 +; GFX6-NEXT: s_lshl_b64 s[28:29], s[2:3], 1 +; GFX6-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 +; GFX6-NEXT: s_or_b64 s[26:27], s[26:27], s[28:29] +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s30 +; GFX6-NEXT: s_and_b32 s17, s17, 1 +; GFX6-NEXT: s_cmp_lg_u32 s17, 0 +; GFX6-NEXT: s_cselect_b64 s[24:25], s[24:25], 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[26:27], s[0:1] +; GFX6-NEXT: s_and_b32 s17, s23, 1 +; GFX6-NEXT: s_cmp_lg_u32 s17, 0 +; GFX6-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX6-NEXT: s_sub_i32 s23, s16, 64 +; GFX6-NEXT: s_sub_i32 s17, 64, s16 +; GFX6-NEXT: s_cmp_lt_u32 s16, 64 +; GFX6-NEXT: s_cselect_b32 s28, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s16, 0 +; GFX6-NEXT: s_cselect_b32 s29, 1, 0 +; GFX6-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 +; GFX6-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 +; GFX6-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 +; GFX6-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 +; GFX6-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] +; GFX6-NEXT: s_and_b32 s23, s28, 1 +; GFX6-NEXT: s_cmp_lg_u32 s23, 0 +; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] +; GFX6-NEXT: s_and_b32 s23, s29, 1 +; GFX6-NEXT: s_cmp_lg_u32 s23, 0 +; GFX6-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] +; GFX6-NEXT: s_sub_i32 s26, s22, 64 +; GFX6-NEXT: s_sub_i32 s24, 64, s22 +; GFX6-NEXT: s_cmp_lt_u32 s22, 64 +; GFX6-NEXT: s_cselect_b32 s27, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s22, 0 +; GFX6-NEXT: s_cselect_b32 s28, 1, 0 +; GFX6-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 +; GFX6-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 +; GFX6-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 +; GFX6-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] +; GFX6-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 +; GFX6-NEXT: s_and_b32 s24, s27, 1 +; GFX6-NEXT: s_cmp_lg_u32 s24, 0 +; GFX6-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] +; GFX6-NEXT: s_and_b32 s22, s28, 1 +; GFX6-NEXT: s_cmp_lg_u32 s22, 0 +; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] +; GFX6-NEXT: s_cmp_lg_u32 s24, 0 +; GFX6-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 +; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] +; GFX6-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] +; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] +; GFX6-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: s_cselect_b32 s11, 1, 0 +; GFX6-NEXT: s_lshr_b64 s[18:19], s[4:5], s31 +; GFX6-NEXT: s_lshl_b64 s[20:21], s[6:7], 1 +; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 +; GFX6-NEXT: s_or_b64 s[18:19], s[18:19], s[20:21] +; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s30 +; GFX6-NEXT: s_and_b32 s9, s9, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 +; GFX6-NEXT: s_cselect_b64 s[4:5], s[18:19], s[4:5] +; GFX6-NEXT: s_and_b32 s9, s11, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] +; GFX6-NEXT: s_sub_i32 s9, s10, 64 +; GFX6-NEXT: s_sub_i32 s11, 64, s10 +; GFX6-NEXT: s_cmp_lt_u32 s10, 64 +; GFX6-NEXT: s_cselect_b32 s20, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s10, 0 +; GFX6-NEXT: s_cselect_b32 s21, 1, 0 +; GFX6-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 +; GFX6-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 +; GFX6-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 +; GFX6-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 +; GFX6-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] +; GFX6-NEXT: s_and_b32 s9, s20, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 +; GFX6-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] +; GFX6-NEXT: s_and_b32 s9, s21, 1 +; GFX6-NEXT: s_cmp_lg_u32 s9, 0 +; GFX6-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] +; GFX6-NEXT: s_sub_i32 s18, s8, 64 +; GFX6-NEXT: s_sub_i32 s16, 64, s8 +; GFX6-NEXT: s_cmp_lt_u32 s8, 64 +; GFX6-NEXT: s_cselect_b32 s19, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 s8, 0 +; GFX6-NEXT: s_cselect_b32 s20, 1, 0 +; GFX6-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 +; GFX6-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 +; GFX6-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 +; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] +; GFX6-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 +; GFX6-NEXT: s_and_b32 s16, s19, 1 +; GFX6-NEXT: s_cmp_lg_u32 s16, 0 +; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] +; GFX6-NEXT: s_and_b32 s14, s20, 1 +; GFX6-NEXT: s_cmp_lg_u32 s14, 0 +; GFX6-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] +; GFX6-NEXT: s_cmp_lg_u32 s16, 0 +; GFX6-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 +; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] +; GFX6-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_fshr_v2i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_movk_i32 s18, 0x7f +; GFX8-NEXT: s_mov_b32 s19, 0 +; GFX8-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] +; GFX8-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] +; GFX8-NEXT: s_sub_i32 s30, 1, 64 +; GFX8-NEXT: s_sub_i32 s31, 64, 1 +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: s_cselect_b32 s17, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: s_cselect_b32 s23, 1, 0 +; GFX8-NEXT: s_lshr_b64 s[26:27], s[0:1], s31 +; GFX8-NEXT: s_lshl_b64 s[28:29], s[2:3], 1 +; GFX8-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 +; GFX8-NEXT: s_or_b64 s[26:27], s[26:27], s[28:29] +; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s30 +; GFX8-NEXT: s_and_b32 s17, s17, 1 +; GFX8-NEXT: s_cmp_lg_u32 s17, 0 +; GFX8-NEXT: s_cselect_b64 s[24:25], s[24:25], 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[26:27], s[0:1] +; GFX8-NEXT: s_and_b32 s17, s23, 1 +; GFX8-NEXT: s_cmp_lg_u32 s17, 0 +; GFX8-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX8-NEXT: s_sub_i32 s23, s16, 64 +; GFX8-NEXT: s_sub_i32 s17, 64, s16 +; GFX8-NEXT: s_cmp_lt_u32 s16, 64 +; GFX8-NEXT: s_cselect_b32 s28, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s16, 0 +; GFX8-NEXT: s_cselect_b32 s29, 1, 0 +; GFX8-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 +; GFX8-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 +; GFX8-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 +; GFX8-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 +; GFX8-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] +; GFX8-NEXT: s_and_b32 s23, s28, 1 +; GFX8-NEXT: s_cmp_lg_u32 s23, 0 +; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] +; GFX8-NEXT: s_and_b32 s23, s29, 1 +; GFX8-NEXT: s_cmp_lg_u32 s23, 0 +; GFX8-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] +; GFX8-NEXT: s_sub_i32 s26, s22, 64 +; GFX8-NEXT: s_sub_i32 s24, 64, s22 +; GFX8-NEXT: s_cmp_lt_u32 s22, 64 +; GFX8-NEXT: s_cselect_b32 s27, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s22, 0 +; GFX8-NEXT: s_cselect_b32 s28, 1, 0 +; GFX8-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 +; GFX8-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 +; GFX8-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 +; GFX8-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] +; GFX8-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 +; GFX8-NEXT: s_and_b32 s24, s27, 1 +; GFX8-NEXT: s_cmp_lg_u32 s24, 0 +; GFX8-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] +; GFX8-NEXT: s_and_b32 s22, s28, 1 +; GFX8-NEXT: s_cmp_lg_u32 s22, 0 +; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] +; GFX8-NEXT: s_cmp_lg_u32 s24, 0 +; GFX8-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 +; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] +; GFX8-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] +; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] +; GFX8-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: s_cselect_b32 s11, 1, 0 +; GFX8-NEXT: s_lshr_b64 s[18:19], s[4:5], s31 +; GFX8-NEXT: s_lshl_b64 s[20:21], s[6:7], 1 +; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 +; GFX8-NEXT: s_or_b64 s[18:19], s[18:19], s[20:21] +; GFX8-NEXT: s_lshl_b64 s[4:5], s[4:5], s30 +; GFX8-NEXT: s_and_b32 s9, s9, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 +; GFX8-NEXT: s_cselect_b64 s[4:5], s[18:19], s[4:5] +; GFX8-NEXT: s_and_b32 s9, s11, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] +; GFX8-NEXT: s_sub_i32 s9, s10, 64 +; GFX8-NEXT: s_sub_i32 s11, 64, s10 +; GFX8-NEXT: s_cmp_lt_u32 s10, 64 +; GFX8-NEXT: s_cselect_b32 s20, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s10, 0 +; GFX8-NEXT: s_cselect_b32 s21, 1, 0 +; GFX8-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 +; GFX8-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 +; GFX8-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 +; GFX8-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 +; GFX8-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] +; GFX8-NEXT: s_and_b32 s9, s20, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 +; GFX8-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] +; GFX8-NEXT: s_and_b32 s9, s21, 1 +; GFX8-NEXT: s_cmp_lg_u32 s9, 0 +; GFX8-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] +; GFX8-NEXT: s_sub_i32 s18, s8, 64 +; GFX8-NEXT: s_sub_i32 s16, 64, s8 +; GFX8-NEXT: s_cmp_lt_u32 s8, 64 +; GFX8-NEXT: s_cselect_b32 s19, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 s8, 0 +; GFX8-NEXT: s_cselect_b32 s20, 1, 0 +; GFX8-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 +; GFX8-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 +; GFX8-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 +; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] +; GFX8-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 +; GFX8-NEXT: s_and_b32 s16, s19, 1 +; GFX8-NEXT: s_cmp_lg_u32 s16, 0 +; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] +; GFX8-NEXT: s_and_b32 s14, s20, 1 +; GFX8-NEXT: s_cmp_lg_u32 s14, 0 +; GFX8-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] +; GFX8-NEXT: s_cmp_lg_u32 s16, 0 +; GFX8-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 +; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] +; GFX8-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_fshr_v2i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_movk_i32 s18, 0x7f +; GFX9-NEXT: s_mov_b32 s19, 0 +; GFX9-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] +; GFX9-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] +; GFX9-NEXT: s_sub_i32 s30, 1, 64 +; GFX9-NEXT: s_sub_i32 s31, 64, 1 +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: s_cselect_b32 s17, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: s_cselect_b32 s23, 1, 0 +; GFX9-NEXT: s_lshr_b64 s[26:27], s[0:1], s31 +; GFX9-NEXT: s_lshl_b64 s[28:29], s[2:3], 1 +; GFX9-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 +; GFX9-NEXT: s_or_b64 s[26:27], s[26:27], s[28:29] +; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s30 +; GFX9-NEXT: s_and_b32 s17, s17, 1 +; GFX9-NEXT: s_cmp_lg_u32 s17, 0 +; GFX9-NEXT: s_cselect_b64 s[24:25], s[24:25], 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[26:27], s[0:1] +; GFX9-NEXT: s_and_b32 s17, s23, 1 +; GFX9-NEXT: s_cmp_lg_u32 s17, 0 +; GFX9-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: s_sub_i32 s23, s16, 64 +; GFX9-NEXT: s_sub_i32 s17, 64, s16 +; GFX9-NEXT: s_cmp_lt_u32 s16, 64 +; GFX9-NEXT: s_cselect_b32 s28, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s16, 0 +; GFX9-NEXT: s_cselect_b32 s29, 1, 0 +; GFX9-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 +; GFX9-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 +; GFX9-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 +; GFX9-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 +; GFX9-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] +; GFX9-NEXT: s_and_b32 s23, s28, 1 +; GFX9-NEXT: s_cmp_lg_u32 s23, 0 +; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] +; GFX9-NEXT: s_and_b32 s23, s29, 1 +; GFX9-NEXT: s_cmp_lg_u32 s23, 0 +; GFX9-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] +; GFX9-NEXT: s_sub_i32 s26, s22, 64 +; GFX9-NEXT: s_sub_i32 s24, 64, s22 +; GFX9-NEXT: s_cmp_lt_u32 s22, 64 +; GFX9-NEXT: s_cselect_b32 s27, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s22, 0 +; GFX9-NEXT: s_cselect_b32 s28, 1, 0 +; GFX9-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 +; GFX9-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 +; GFX9-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 +; GFX9-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] +; GFX9-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 +; GFX9-NEXT: s_and_b32 s24, s27, 1 +; GFX9-NEXT: s_cmp_lg_u32 s24, 0 +; GFX9-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] +; GFX9-NEXT: s_and_b32 s22, s28, 1 +; GFX9-NEXT: s_cmp_lg_u32 s22, 0 +; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] +; GFX9-NEXT: s_cmp_lg_u32 s24, 0 +; GFX9-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 +; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] +; GFX9-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] +; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] +; GFX9-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: s_cselect_b32 s11, 1, 0 +; GFX9-NEXT: s_lshr_b64 s[18:19], s[4:5], s31 +; GFX9-NEXT: s_lshl_b64 s[20:21], s[6:7], 1 +; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 +; GFX9-NEXT: s_or_b64 s[18:19], s[18:19], s[20:21] +; GFX9-NEXT: s_lshl_b64 s[4:5], s[4:5], s30 +; GFX9-NEXT: s_and_b32 s9, s9, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 +; GFX9-NEXT: s_cselect_b64 s[4:5], s[18:19], s[4:5] +; GFX9-NEXT: s_and_b32 s9, s11, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] +; GFX9-NEXT: s_sub_i32 s9, s10, 64 +; GFX9-NEXT: s_sub_i32 s11, 64, s10 +; GFX9-NEXT: s_cmp_lt_u32 s10, 64 +; GFX9-NEXT: s_cselect_b32 s20, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s10, 0 +; GFX9-NEXT: s_cselect_b32 s21, 1, 0 +; GFX9-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 +; GFX9-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 +; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 +; GFX9-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 +; GFX9-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] +; GFX9-NEXT: s_and_b32 s9, s20, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 +; GFX9-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] +; GFX9-NEXT: s_and_b32 s9, s21, 1 +; GFX9-NEXT: s_cmp_lg_u32 s9, 0 +; GFX9-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] +; GFX9-NEXT: s_sub_i32 s18, s8, 64 +; GFX9-NEXT: s_sub_i32 s16, 64, s8 +; GFX9-NEXT: s_cmp_lt_u32 s8, 64 +; GFX9-NEXT: s_cselect_b32 s19, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 s8, 0 +; GFX9-NEXT: s_cselect_b32 s20, 1, 0 +; GFX9-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 +; GFX9-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 +; GFX9-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 +; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] +; GFX9-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 +; GFX9-NEXT: s_and_b32 s16, s19, 1 +; GFX9-NEXT: s_cmp_lg_u32 s16, 0 +; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] +; GFX9-NEXT: s_and_b32 s14, s20, 1 +; GFX9-NEXT: s_cmp_lg_u32 s14, 0 +; GFX9-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] +; GFX9-NEXT: s_cmp_lg_u32 s16, 0 +; GFX9-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 +; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] +; GFX9-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_fshr_v2i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_movk_i32 s18, 0x7f +; GFX10-NEXT: s_mov_b32 s19, 0 +; GFX10-NEXT: s_sub_i32 s30, 1, 64 +; GFX10-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] +; GFX10-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] +; GFX10-NEXT: s_sub_i32 s31, 64, 1 +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: s_mov_b32 s62, s10 +; GFX10-NEXT: s_cselect_b32 s17, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: s_mov_b32 s63, s11 +; GFX10-NEXT: s_cselect_b32 s23, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s31 +; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], 1 +; GFX10-NEXT: s_and_b32 s17, s17, 1 +; GFX10-NEXT: s_lshl_b64 s[28:29], s[0:1], 1 +; GFX10-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] +; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s30 +; GFX10-NEXT: s_cmp_lg_u32 s17, 0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_cselect_b64 s[26:27], s[28:29], 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] +; GFX10-NEXT: s_and_b32 s17, s23, 1 +; GFX10-NEXT: s_cmp_lg_u32 s17, 0 +; GFX10-NEXT: s_cselect_b64 s[46:47], s[2:3], s[0:1] +; GFX10-NEXT: s_sub_i32 s23, s16, 64 +; GFX10-NEXT: s_sub_i32 s2, 64, s16 +; GFX10-NEXT: s_cmp_lt_u32 s16, 64 +; GFX10-NEXT: s_cselect_b32 s28, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s16, 0 +; GFX10-NEXT: s_cselect_b32 s29, 1, 0 +; GFX10-NEXT: s_lshl_b64 s[24:25], s[46:47], s16 +; GFX10-NEXT: s_lshr_b64 s[2:3], s[26:27], s2 +; GFX10-NEXT: s_and_b32 s28, s28, 1 +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[24:25] +; GFX10-NEXT: s_lshl_b64 s[16:17], s[26:27], s16 +; GFX10-NEXT: s_lshl_b64 s[24:25], s[26:27], s23 +; GFX10-NEXT: s_cmp_lg_u32 s28, 0 +; GFX10-NEXT: s_cselect_b64 s[78:79], s[16:17], 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[24:25] +; GFX10-NEXT: s_and_b32 s23, s29, 1 +; GFX10-NEXT: s_cmp_lg_u32 s23, 0 +; GFX10-NEXT: s_cselect_b64 s[2:3], s[46:47], s[2:3] +; GFX10-NEXT: s_sub_i32 s26, s22, 64 +; GFX10-NEXT: s_sub_i32 s23, 64, s22 +; GFX10-NEXT: s_cmp_lt_u32 s22, 64 +; GFX10-NEXT: s_cselect_b32 s27, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s22, 0 +; GFX10-NEXT: s_cselect_b32 s28, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], s22 +; GFX10-NEXT: s_lshl_b64 s[24:25], s[62:63], s23 +; GFX10-NEXT: s_lshr_b64 s[22:23], s[62:63], s22 +; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25] +; GFX10-NEXT: s_and_b32 s24, s27, 1 +; GFX10-NEXT: s_lshr_b64 s[10:11], s[62:63], s26 +; GFX10-NEXT: s_cmp_lg_u32 s24, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11] +; GFX10-NEXT: s_and_b32 s10, s28, 1 +; GFX10-NEXT: s_cmp_lg_u32 s10, 0 +; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] +; GFX10-NEXT: s_cmp_lg_u32 s24, 0 +; GFX10-NEXT: s_cselect_b64 s[8:9], s[22:23], 0 +; GFX10-NEXT: s_or_b64 s[0:1], s[78:79], s[0:1] +; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] +; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] +; GFX10-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: s_cselect_b32 s9, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: s_cselect_b32 s11, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[16:17], s[4:5], s31 +; GFX10-NEXT: s_lshl_b64 s[18:19], s[6:7], 1 +; GFX10-NEXT: s_and_b32 s9, s9, 1 +; GFX10-NEXT: s_lshl_b64 s[20:21], s[4:5], 1 +; GFX10-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] +; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s30 +; GFX10-NEXT: s_cmp_lg_u32 s9, 0 +; GFX10-NEXT: s_cselect_b64 s[18:19], s[20:21], 0 +; GFX10-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5] +; GFX10-NEXT: s_and_b32 s9, s11, 1 +; GFX10-NEXT: s_cmp_lg_u32 s9, 0 +; GFX10-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] +; GFX10-NEXT: s_sub_i32 s9, s10, 64 +; GFX10-NEXT: s_sub_i32 s6, 64, s10 +; GFX10-NEXT: s_cmp_lt_u32 s10, 64 +; GFX10-NEXT: s_cselect_b32 s20, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s10, 0 +; GFX10-NEXT: s_cselect_b32 s21, 1, 0 +; GFX10-NEXT: s_lshl_b64 s[16:17], s[4:5], s10 +; GFX10-NEXT: s_lshr_b64 s[6:7], s[18:19], s6 +; GFX10-NEXT: s_and_b32 s20, s20, 1 +; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[16:17] +; GFX10-NEXT: s_lshl_b64 s[10:11], s[18:19], s10 +; GFX10-NEXT: s_lshl_b64 s[16:17], s[18:19], s9 +; GFX10-NEXT: s_cmp_lg_u32 s20, 0 +; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 +; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[16:17] +; GFX10-NEXT: s_and_b32 s9, s21, 1 +; GFX10-NEXT: s_cmp_lg_u32 s9, 0 +; GFX10-NEXT: s_cselect_b64 s[6:7], s[4:5], s[6:7] +; GFX10-NEXT: s_sub_i32 s18, s8, 64 +; GFX10-NEXT: s_sub_i32 s9, 64, s8 +; GFX10-NEXT: s_cmp_lt_u32 s8, 64 +; GFX10-NEXT: s_cselect_b32 s19, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 s8, 0 +; GFX10-NEXT: s_cselect_b32 s20, 1, 0 +; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], s8 +; GFX10-NEXT: s_lshl_b64 s[16:17], s[14:15], s9 +; GFX10-NEXT: s_lshr_b64 s[8:9], s[14:15], s8 +; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[16:17] +; GFX10-NEXT: s_and_b32 s16, s19, 1 +; GFX10-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 +; GFX10-NEXT: s_cmp_lg_u32 s16, 0 +; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[14:15] +; GFX10-NEXT: s_and_b32 s14, s20, 1 +; GFX10-NEXT: s_cmp_lg_u32 s14, 0 +; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5] +; GFX10-NEXT: s_cmp_lg_u32 s16, 0 +; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 +; GFX10-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] +; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GFX10-NEXT: ; return to shader part epilog + %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) + ret <2 x i128> %result +} + +define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) { +; GFX6-LABEL: v_fshr_v2i128: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_sub_i32 s6, 64, 1 +; GFX6-NEXT: s_sub_i32 s7, 1, 64 +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: v_lshr_b64 v[17:18], v[0:1], s6 +; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], 1 +; GFX6-NEXT: s_cselect_b32 s4, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: v_or_b32_e32 v19, v17, v21 +; GFX6-NEXT: v_or_b32_e32 v21, v18, v22 +; GFX6-NEXT: s_cselect_b32 s5, 1, 0 +; GFX6-NEXT: v_lshl_b64 v[17:18], v[0:1], 1 +; GFX6-NEXT: s_and_b32 s4, 1, s4 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s7 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: s_and_b32 s4, 1, s5 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v19, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, s4 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] +; GFX6-NEXT: s_movk_i32 s8, 0x7f +; GFX6-NEXT: v_xor_b32_e32 v2, -1, v16 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v21, vcc +; GFX6-NEXT: v_and_b32_e32 v19, s8, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v18, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v19 +; GFX6-NEXT: v_lshr_b64 v[2:3], v[17:18], v2 +; GFX6-NEXT: v_lshl_b64 v[21:22], v[0:1], v19 +; GFX6-NEXT: v_and_b32_e32 v25, s8, v16 +; GFX6-NEXT: v_or_b32_e32 v23, v2, v21 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v25 +; GFX6-NEXT: v_or_b32_e32 v24, v3, v22 +; GFX6-NEXT: v_lshl_b64 v[2:3], v[10:11], v2 +; GFX6-NEXT: v_lshr_b64 v[21:22], v[8:9], v25 +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19 +; GFX6-NEXT: v_or_b32_e32 v21, v21, v2 +; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v19 +; GFX6-NEXT: v_or_b32_e32 v22, v22, v3 +; GFX6-NEXT: v_lshl_b64 v[2:3], v[17:18], v2 +; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v23, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v24, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v1, s[4:5] +; GFX6-NEXT: v_subrev_i32_e64 v0, s[4:5], 64, v25 +; GFX6-NEXT: v_lshl_b64 v[16:17], v[17:18], v19 +; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v0 +; GFX6-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v25 +; GFX6-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v18, v0, v21, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e64 v19, v1, v22, s[4:5] +; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v25 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v25 +; GFX6-NEXT: v_cndmask_b32_e32 v8, v18, v8, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[4:5] +; GFX6-NEXT: v_or_b32_e32 v0, v16, v8 +; GFX6-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e32 v9, v19, v9, vcc +; GFX6-NEXT: v_xor_b32_e32 v8, -1, v20 +; GFX6-NEXT: v_or_b32_e32 v1, v17, v9 +; GFX6-NEXT: v_and_b32_e32 v17, s8, v8 +; GFX6-NEXT: s_cmp_lt_u32 1, 64 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX6-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], s6 +; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], 1 +; GFX6-NEXT: s_cselect_b32 s4, 1, 0 +; GFX6-NEXT: s_cmp_eq_u32 1, 0 +; GFX6-NEXT: s_cselect_b32 s5, 1, 0 +; GFX6-NEXT: s_and_b32 s4, 1, s4 +; GFX6-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX6-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], 1 +; GFX6-NEXT: v_lshl_b64 v[4:5], v[4:5], s7 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: s_and_b32 s4, 1, s5 +; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v17 +; GFX6-NEXT: v_lshr_b64 v[6:7], v[8:9], v6 +; GFX6-NEXT: v_lshl_b64 v[10:11], v[4:5], v17 +; GFX6-NEXT: v_subrev_i32_e32 v18, vcc, 64, v17 +; GFX6-NEXT: v_or_b32_e32 v10, v6, v10 +; GFX6-NEXT: v_or_b32_e32 v11, v7, v11 +; GFX6-NEXT: v_lshl_b64 v[6:7], v[8:9], v17 +; GFX6-NEXT: v_lshl_b64 v[8:9], v[8:9], v18 +; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 +; GFX6-NEXT: v_and_b32_e32 v16, s8, v20 +; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v6, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v19, 0, v7, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v6, v8, v10, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v7, v9, v11, vcc +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 +; GFX6-NEXT: v_cndmask_b32_e32 v8, v6, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v9, v7, v5, vcc +; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v16 +; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], v16 +; GFX6-NEXT: v_lshl_b64 v[6:7], v[14:15], v6 +; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, 64, v16 +; GFX6-NEXT: v_or_b32_e32 v11, v4, v6 +; GFX6-NEXT: v_or_b32_e32 v17, v5, v7 +; GFX6-NEXT: v_lshr_b64 v[6:7], v[14:15], v10 +; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 +; GFX6-NEXT: v_lshr_b64 v[4:5], v[14:15], v16 +; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 +; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc +; GFX6-NEXT: v_or_b32_e32 v4, v18, v6 +; GFX6-NEXT: v_or_b32_e32 v5, v19, v7 +; GFX6-NEXT: v_or_b32_e32 v6, v8, v10 +; GFX6-NEXT: v_or_b32_e32 v7, v9, v11 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_fshr_v2i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_sub_i32 s6, 64, 1 +; GFX8-NEXT: s_sub_i32 s7, 1, 64 +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: v_lshrrev_b64 v[17:18], s6, v[0:1] +; GFX8-NEXT: v_lshlrev_b64 v[21:22], 1, v[2:3] +; GFX8-NEXT: s_cselect_b32 s4, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: v_or_b32_e32 v19, v17, v21 +; GFX8-NEXT: v_or_b32_e32 v21, v18, v22 +; GFX8-NEXT: s_cselect_b32 s5, 1, 0 +; GFX8-NEXT: v_lshlrev_b64 v[17:18], 1, v[0:1] +; GFX8-NEXT: s_and_b32 s4, 1, s4 +; GFX8-NEXT: v_lshlrev_b64 v[0:1], s7, v[0:1] +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: s_and_b32 s4, 1, s5 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v19, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, s4 +; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] +; GFX8-NEXT: s_movk_i32 s8, 0x7f +; GFX8-NEXT: v_xor_b32_e32 v2, -1, v16 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v21, vcc +; GFX8-NEXT: v_and_b32_e32 v19, s8, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v18, vcc +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v19 +; GFX8-NEXT: v_lshrrev_b64 v[2:3], v2, v[17:18] +; GFX8-NEXT: v_lshlrev_b64 v[21:22], v19, v[0:1] +; GFX8-NEXT: v_and_b32_e32 v25, s8, v16 +; GFX8-NEXT: v_or_b32_e32 v23, v2, v21 +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v25 +; GFX8-NEXT: v_or_b32_e32 v24, v3, v22 +; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11] +; GFX8-NEXT: v_lshrrev_b64 v[21:22], v25, v[8:9] +; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19 +; GFX8-NEXT: v_or_b32_e32 v21, v21, v2 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v19 +; GFX8-NEXT: v_or_b32_e32 v22, v22, v3 +; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[17:18] +; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v23, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v24, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v1, s[4:5] +; GFX8-NEXT: v_subrev_u32_e64 v0, s[4:5], 64, v25 +; GFX8-NEXT: v_lshlrev_b64 v[16:17], v19, v[17:18] +; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[10:11] +; GFX8-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v25 +; GFX8-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v18, v0, v21, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v19, v1, v22, s[4:5] +; GFX8-NEXT: v_lshrrev_b64 v[0:1], v25, v[10:11] +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v25 +; GFX8-NEXT: v_cndmask_b32_e32 v8, v18, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[4:5] +; GFX8-NEXT: v_or_b32_e32 v0, v16, v8 +; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v9, v19, v9, vcc +; GFX8-NEXT: v_xor_b32_e32 v8, -1, v20 +; GFX8-NEXT: v_or_b32_e32 v1, v17, v9 +; GFX8-NEXT: v_and_b32_e32 v17, s8, v8 +; GFX8-NEXT: s_cmp_lt_u32 1, 64 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX8-NEXT: v_lshrrev_b64 v[8:9], s6, v[4:5] +; GFX8-NEXT: v_lshlrev_b64 v[10:11], 1, v[6:7] +; GFX8-NEXT: s_cselect_b32 s4, 1, 0 +; GFX8-NEXT: s_cmp_eq_u32 1, 0 +; GFX8-NEXT: s_cselect_b32 s5, 1, 0 +; GFX8-NEXT: s_and_b32 s4, 1, s4 +; GFX8-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX8-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5] +; GFX8-NEXT: v_lshlrev_b64 v[4:5], s7, v[4:5] +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: s_and_b32 s4, 1, s5 +; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v17 +; GFX8-NEXT: v_lshrrev_b64 v[6:7], v6, v[8:9] +; GFX8-NEXT: v_lshlrev_b64 v[10:11], v17, v[4:5] +; GFX8-NEXT: v_subrev_u32_e32 v18, vcc, 64, v17 +; GFX8-NEXT: v_or_b32_e32 v10, v6, v10 +; GFX8-NEXT: v_or_b32_e32 v11, v7, v11 +; GFX8-NEXT: v_lshlrev_b64 v[6:7], v17, v[8:9] +; GFX8-NEXT: v_lshlrev_b64 v[8:9], v18, v[8:9] +; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 +; GFX8-NEXT: v_and_b32_e32 v16, s8, v20 +; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v6, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v19, 0, v7, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v10, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v7, v9, v11, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 +; GFX8-NEXT: v_cndmask_b32_e32 v8, v6, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v9, v7, v5, vcc +; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v16 +; GFX8-NEXT: v_lshrrev_b64 v[4:5], v16, v[12:13] +; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15] +; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, 64, v16 +; GFX8-NEXT: v_or_b32_e32 v11, v4, v6 +; GFX8-NEXT: v_or_b32_e32 v17, v5, v7 +; GFX8-NEXT: v_lshrrev_b64 v[6:7], v10, v[14:15] +; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 +; GFX8-NEXT: v_lshrrev_b64 v[4:5], v16, v[14:15] +; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc +; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 +; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc +; GFX8-NEXT: v_or_b32_e32 v4, v18, v6 +; GFX8-NEXT: v_or_b32_e32 v5, v19, v7 +; GFX8-NEXT: v_or_b32_e32 v6, v8, v10 +; GFX8-NEXT: v_or_b32_e32 v7, v9, v11 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_fshr_v2i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_sub_i32 s6, 64, 1 +; GFX9-NEXT: s_sub_i32 s7, 1, 64 +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: v_lshrrev_b64 v[17:18], s6, v[0:1] +; GFX9-NEXT: v_lshlrev_b64 v[21:22], 1, v[2:3] +; GFX9-NEXT: s_cselect_b32 s4, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: v_or_b32_e32 v19, v17, v21 +; GFX9-NEXT: v_or_b32_e32 v21, v18, v22 +; GFX9-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-NEXT: v_lshlrev_b64 v[17:18], 1, v[0:1] +; GFX9-NEXT: s_and_b32 s4, 1, s4 +; GFX9-NEXT: v_lshlrev_b64 v[0:1], s7, v[0:1] +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: s_and_b32 s4, 1, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v19, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, s4 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] +; GFX9-NEXT: s_movk_i32 s8, 0x7f +; GFX9-NEXT: v_xor_b32_e32 v2, -1, v16 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v21, vcc +; GFX9-NEXT: v_and_b32_e32 v19, s8, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v18, vcc +; GFX9-NEXT: v_sub_u32_e32 v2, 64, v19 +; GFX9-NEXT: v_lshrrev_b64 v[2:3], v2, v[17:18] +; GFX9-NEXT: v_lshlrev_b64 v[21:22], v19, v[0:1] +; GFX9-NEXT: v_and_b32_e32 v25, s8, v16 +; GFX9-NEXT: v_or_b32_e32 v23, v2, v21 +; GFX9-NEXT: v_sub_u32_e32 v2, 64, v25 +; GFX9-NEXT: v_or_b32_e32 v24, v3, v22 +; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11] +; GFX9-NEXT: v_lshrrev_b64 v[21:22], v25, v[8:9] +; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19 +; GFX9-NEXT: v_or_b32_e32 v21, v21, v2 +; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v19 +; GFX9-NEXT: v_or_b32_e32 v22, v22, v3 +; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[17:18] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v23, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v24, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5] +; GFX9-NEXT: v_subrev_u32_e32 v0, 64, v25 +; GFX9-NEXT: v_lshlrev_b64 v[16:17], v19, v[17:18] +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v1, s[4:5] +; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[10:11] +; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v25 +; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v18, v0, v21, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v19, v1, v22, s[4:5] +; GFX9-NEXT: v_lshrrev_b64 v[0:1], v25, v[10:11] +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v25 +; GFX9-NEXT: v_cndmask_b32_e32 v8, v18, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[4:5] +; GFX9-NEXT: v_or_b32_e32 v0, v16, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v9, v19, v9, vcc +; GFX9-NEXT: v_xor_b32_e32 v8, -1, v20 +; GFX9-NEXT: v_or_b32_e32 v1, v17, v9 +; GFX9-NEXT: v_and_b32_e32 v17, s8, v8 +; GFX9-NEXT: s_cmp_lt_u32 1, 64 +; GFX9-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX9-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX9-NEXT: v_lshrrev_b64 v[8:9], s6, v[4:5] +; GFX9-NEXT: v_lshlrev_b64 v[10:11], 1, v[6:7] +; GFX9-NEXT: s_cselect_b32 s4, 1, 0 +; GFX9-NEXT: s_cmp_eq_u32 1, 0 +; GFX9-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-NEXT: s_and_b32 s4, 1, s4 +; GFX9-NEXT: v_or_b32_e32 v10, v8, v10 +; GFX9-NEXT: v_or_b32_e32 v11, v9, v11 +; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5] +; GFX9-NEXT: v_lshlrev_b64 v[4:5], s7, v[4:5] +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: s_and_b32 s4, 1, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; GFX9-NEXT: v_sub_u32_e32 v6, 64, v17 +; GFX9-NEXT: v_lshrrev_b64 v[6:7], v6, v[8:9] +; GFX9-NEXT: v_lshlrev_b64 v[10:11], v17, v[4:5] +; GFX9-NEXT: v_subrev_u32_e32 v18, 64, v17 +; GFX9-NEXT: v_or_b32_e32 v10, v6, v10 +; GFX9-NEXT: v_or_b32_e32 v11, v7, v11 +; GFX9-NEXT: v_lshlrev_b64 v[6:7], v17, v[8:9] +; GFX9-NEXT: v_lshlrev_b64 v[8:9], v18, v[8:9] +; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 +; GFX9-NEXT: v_and_b32_e32 v16, s8, v20 +; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v7, v9, v11, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 +; GFX9-NEXT: v_cndmask_b32_e32 v8, v6, v4, vcc +; GFX9-NEXT: v_sub_u32_e32 v6, 64, v16 +; GFX9-NEXT: v_cndmask_b32_e32 v9, v7, v5, vcc +; GFX9-NEXT: v_lshrrev_b64 v[4:5], v16, v[12:13] +; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15] +; GFX9-NEXT: v_subrev_u32_e32 v10, 64, v16 +; GFX9-NEXT: v_or_b32_e32 v11, v4, v6 +; GFX9-NEXT: v_or_b32_e32 v17, v5, v7 +; GFX9-NEXT: v_lshrrev_b64 v[6:7], v10, v[14:15] +; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 +; GFX9-NEXT: v_lshrrev_b64 v[4:5], v16, v[14:15] +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 +; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc +; GFX9-NEXT: v_or_b32_e32 v4, v18, v6 +; GFX9-NEXT: v_or_b32_e32 v5, v19, v7 +; GFX9-NEXT: v_or_b32_e32 v6, v8, v10 +; GFX9-NEXT: v_or_b32_e32 v7, v9, v11 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_fshr_v2i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_sub_i32 s5, 64, 1 +; GFX10-NEXT: s_sub_i32 s6, 1, 64 +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: v_lshrrev_b64 v[27:28], s5, v[0:1] +; GFX10-NEXT: v_lshlrev_b64 v[21:22], 1, v[2:3] +; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: v_lshlrev_b64 v[23:24], 1, v[0:1] +; GFX10-NEXT: s_cselect_b32 s8, 1, 0 +; GFX10-NEXT: s_and_b32 s4, 1, vcc_lo +; GFX10-NEXT: v_lshlrev_b64 v[0:1], s6, v[0:1] +; GFX10-NEXT: v_xor_b32_e32 v19, -1, v16 +; GFX10-NEXT: v_or_b32_e32 v21, v27, v21 +; GFX10-NEXT: v_or_b32_e32 v18, v28, v22 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s4 +; GFX10-NEXT: s_movk_i32 s7, 0x7f +; GFX10-NEXT: s_and_b32 s8, 1, s8 +; GFX10-NEXT: v_and_b32_e32 v31, s7, v19 +; GFX10-NEXT: v_cndmask_b32_e32 v17, 0, v23, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v18, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v21, s4 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s8 +; GFX10-NEXT: v_cndmask_b32_e32 v18, 0, v24, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v19, 64, v31 +; GFX10-NEXT: v_and_b32_e32 v26, s7, v16 +; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 64, v31 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v2, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v3, s4 +; GFX10-NEXT: v_lshrrev_b64 v[2:3], v19, v[17:18] +; GFX10-NEXT: v_mov_b32_e32 v35, v10 +; GFX10-NEXT: v_mov_b32_e32 v36, v11 +; GFX10-NEXT: v_sub_nc_u32_e32 v25, 64, v26 +; GFX10-NEXT: v_lshlrev_b64 v[21:22], v31, v[0:1] +; GFX10-NEXT: v_lshlrev_b64 v[23:24], v31, v[17:18] +; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v31 +; GFX10-NEXT: v_subrev_nc_u32_e32 v29, 64, v26 +; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v26 +; GFX10-NEXT: v_lshrrev_b64 v[27:28], s5, v[4:5] +; GFX10-NEXT: v_or_b32_e32 v21, v2, v21 +; GFX10-NEXT: v_or_b32_e32 v22, v3, v22 +; GFX10-NEXT: v_lshlrev_b64 v[2:3], v16, v[17:18] +; GFX10-NEXT: v_lshlrev_b64 v[18:19], v25, v[35:36] +; GFX10-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9] +; GFX10-NEXT: v_cndmask_b32_e32 v23, 0, v23, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v39, 0, v24, vcc_lo +; GFX10-NEXT: s_cmp_lt_u32 1, 64 +; GFX10-NEXT: v_cndmask_b32_e32 v21, v2, v21, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v22, v3, v22, vcc_lo +; GFX10-NEXT: v_lshrrev_b64 v[2:3], v29, v[35:36] +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v31 +; GFX10-NEXT: v_or_b32_e32 v16, v16, v18 +; GFX10-NEXT: v_or_b32_e32 v17, v17, v19 +; GFX10-NEXT: v_lshlrev_b64 v[10:11], 1, v[6:7] +; GFX10-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-NEXT: v_cndmask_b32_e32 v18, v21, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v31, v22, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v16, s4 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26 +; GFX10-NEXT: s_cmp_eq_u32 1, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v19, v3, v17, s4 +; GFX10-NEXT: s_cselect_b32 s8, 1, 0 +; GFX10-NEXT: v_lshlrev_b64 v[16:17], 1, v[4:5] +; GFX10-NEXT: v_lshlrev_b64 v[4:5], s6, v[4:5] +; GFX10-NEXT: s_and_b32 s6, 1, s5 +; GFX10-NEXT: v_cndmask_b32_e32 v21, v2, v8, vcc_lo +; GFX10-NEXT: v_xor_b32_e32 v25, -1, v20 +; GFX10-NEXT: v_or_b32_e32 v2, v27, v10 +; GFX10-NEXT: v_or_b32_e32 v3, v28, v11 +; GFX10-NEXT: v_cmp_ne_u32_e64 s6, 0, s6 +; GFX10-NEXT: s_and_b32 s8, 1, s8 +; GFX10-NEXT: v_lshrrev_b64 v[0:1], v26, v[35:36] +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v16, s5 +; GFX10-NEXT: v_cndmask_b32_e32 v16, v19, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v11, v4, v2, s6 +; GFX10-NEXT: v_and_b32_e32 v30, s7, v25 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v5, v3, s6 +; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s8 +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v17, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v19, 0, v0, s4 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v30 +; GFX10-NEXT: v_or_b32_e32 v0, v23, v21 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v11, v6, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc_lo +; GFX10-NEXT: v_and_b32_e32 v23, s7, v20 +; GFX10-NEXT: v_lshrrev_b64 v[5:6], v2, v[8:9] +; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 64, v30 +; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, v1, s4 +; GFX10-NEXT: v_lshlrev_b64 v[10:11], v30, v[3:4] +; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v23 +; GFX10-NEXT: v_or_b32_e32 v1, v39, v16 +; GFX10-NEXT: v_or_b32_e32 v2, v18, v19 +; GFX10-NEXT: v_lshlrev_b64 v[16:17], v30, v[8:9] +; GFX10-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13] +; GFX10-NEXT: v_or_b32_e32 v10, v5, v10 +; GFX10-NEXT: v_subrev_nc_u32_e32 v5, 64, v23 +; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] +; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v30 +; GFX10-NEXT: v_lshlrev_b64 v[7:8], v7, v[8:9] +; GFX10-NEXT: v_or_b32_e32 v9, v6, v11 +; GFX10-NEXT: v_lshrrev_b64 v[34:35], v5, v[14:15] +; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v23 +; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v16, vcc_lo +; GFX10-NEXT: v_or_b32_e32 v16, v18, v20 +; GFX10-NEXT: v_or_b32_e32 v18, v19, v21 +; GFX10-NEXT: v_cndmask_b32_e32 v10, v7, v10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc_lo +; GFX10-NEXT: v_lshrrev_b64 v[7:8], v23, v[14:15] +; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v30 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v34, v16, s4 +; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v23 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v35, v18, s4 +; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v15, v10, v3, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v4, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v5, v12, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, v13, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v7, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, v8, s4 +; GFX10-NEXT: v_or_b32_e32 v3, v31, v26 +; GFX10-NEXT: v_or_b32_e32 v4, v11, v4 +; GFX10-NEXT: v_or_b32_e32 v5, v14, v5 +; GFX10-NEXT: v_or_b32_e32 v6, v15, v6 +; GFX10-NEXT: v_or_b32_e32 v7, v9, v7 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_setpc_b64 s[30:31] + %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) + ret <2 x i128> %result +} + +declare i7 @llvm.fshr.i7(i7, i7, i7) #0 +declare i8 @llvm.fshr.i8(i8, i8, i8) #0 +declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0 +declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0 + +declare i16 @llvm.fshr.i16(i16, i16, i16) #0 +declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0 +declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0 +declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0 +declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0 +declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0 +declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0 + +declare i24 @llvm.fshr.i24(i24, i24, i24) #0 +declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0 + +declare i32 @llvm.fshr.i32(i32, i32, i32) #0 +declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0 +declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0 +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0 +declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0 +declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0 + +declare i48 @llvm.fshr.i48(i48, i48, i48) #0 + +declare i64 @llvm.fshr.i64(i64, i64, i64) #0 +declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0 + +declare i128 @llvm.fshr.i128(i128, i128, i128) #0 +declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0 + +attributes #0 = { nounwind readnone speculatable willreturn } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -0,0 +1,1755 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: test_fshl_s32_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; SI-LABEL: name: test_fshl_s32_s32 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[AND]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[SUB]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; SI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY]], [[OR]] + ; SI: $vgpr0 = COPY [[SELECT]](s32) + ; VI-LABEL: name: test_fshl_s32_s32 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[AND]] + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[SUB]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; VI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY]], [[OR]] + ; VI: $vgpr0 = COPY [[SELECT]](s32) + ; GFX9-LABEL: name: test_fshl_s32_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[AND]] + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[SUB]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY]], [[OR]] + ; GFX9: $vgpr0 = COPY [[SELECT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = G_FSHL %0, %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: test_fshl_v2s32_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; SI-LABEL: name: test_fshl_v2s32_v2s32 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32) + ; SI: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY2]], [[BUILD_VECTOR]] + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV]] + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV1]] + ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[UV4]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV3]], [[UV5]](s32) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32) + ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[SUB]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[SUB1]](s32) + ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) + ; SI: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV8]](s32), [[C1]] + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV9]](s32), [[C1]] + ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; SI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV12]] + ; SI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV11]], [[UV13]] + ; SI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR3]](<2 x s32>) + ; VI-LABEL: name: test_fshl_v2s32_v2s32 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32) + ; VI: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY2]], [[BUILD_VECTOR]] + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV]] + ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV1]] + ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[UV4]](s32) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV3]], [[UV5]](s32) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32) + ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[SUB]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[SUB1]](s32) + ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) + ; VI: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; VI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV8]](s32), [[C1]] + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV9]](s32), [[C1]] + ; VI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; VI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV12]] + ; VI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV11]], [[UV13]] + ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR3]](<2 x s32>) + ; GFX9-LABEL: name: test_fshl_v2s32_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY2]], [[BUILD_VECTOR]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV1]] + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[UV4]](s32) + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV3]], [[UV5]](s32) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SHL]](s32), [[SHL1]](s32) + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[SUB]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[SUB1]](s32) + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LSHR]](s32), [[LSHR1]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV8]](s32), [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV9]](s32), [[C1]] + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[UV10]], [[UV12]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[UV11]], [[UV13]] + ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR3]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x s32>) = G_FSHL %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: test_fshl_s16_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; SI-LABEL: name: test_fshl_s16_s16 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[ANYEXT]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[ZEXT]](s32) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND1]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ZEXT1]](s32), [[C3]] + ; SI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[OR]] + ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT1]](s32) + ; VI-LABEL: name: test_fshl_s16_s16 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16 + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; VI: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[AND]] + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[SUB]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s16), [[C1]] + ; VI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[OR]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_fshl_s16_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16 + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[AND]] + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[SUB]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s16), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[OR]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s16) = G_TRUNC %0 + %4:_(s16) = G_TRUNC %1 + %5:_(s16) = G_TRUNC %2 + %6:_(s16) = G_FSHL %3, %4, %5 + %7:_(s32) = G_ANYEXT %6 + $vgpr0 = COPY %7 +... + +--- +name: test_fshl_v2s16_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; SI-LABEL: name: test_fshl_v2s16_v2s16 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BITCAST]] + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY4]], [[COPY5]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY6]] + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND1]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[AND2]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32) + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[AND5]](s32) + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C2]] + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C2]] + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[AND7]](s32) + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C2]] + ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C2]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL4]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[OR3:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST4]], [[BITCAST6]] + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) + ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND11]](s32), [[COPY20]] + ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C2]] + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND12]](s32), [[C3]] + ; SI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[OR3]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C1]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; SI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC2]] + ; SI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC3]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; VI-LABEL: name: test_fshl_v2s16_v2s16 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16 + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BITCAST]] + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC]] + ; VI: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC4]](s16) + ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC5]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C3]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[SUB]](s16) + ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[SUB1]](s16) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[OR3:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST4]], [[BITCAST6]] + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C3]](s32) + ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC8]](s16), [[C1]] + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC9]](s16), [[C1]] + ; VI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C3]](s32) + ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[OR3]](<2 x s16>) + ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C3]](s32) + ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC10]], [[TRUNC12]] + ; VI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC11]], [[TRUNC13]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL5]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; GFX9-LABEL: name: test_fshl_v2s16_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9: [[SUB:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR_TRUNC]], [[AND]] + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[AND]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[SUB]](<2 x s16>) + ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s16), [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC1]](s16), [[C1]] + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC2]], [[TRUNC4]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC3]], [[TRUNC5]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = COPY $vgpr2 + %3:_(<2 x s16>) = G_FSHL %0, %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: test_fshl_s64_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; SI-LABEL: name: test_fshl_s64_s64 + ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; SI: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; SI: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[USUBO]](s32) + ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s64), [[C1]] + ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[OR]] + ; SI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-LABEL: name: test_fshl_s64_s64 + ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; VI: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; VI: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[USUBO]](s32) + ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s64), [[C1]] + ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[OR]] + ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-LABEL: name: test_fshl_s64_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[USUBO]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s64), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[OR]] + ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = COPY $vgpr4_vgpr5 + %3:_(s64) = G_FSHL %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: test_fshl_s8_s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; SI-LABEL: name: test_fshl_s8_s8 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[COPY4]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY5]], [[COPY6]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND1]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[COPY12]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s32), [[AND5]] + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; SI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-LABEL: name: test_fshl_s8_s8 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[COPY4]] + ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; VI: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[TRUNC]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC2]](s16) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC4]](s16) + ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s32), [[AND5]] + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC5]], [[TRUNC6]] + ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT3]](s32) + ; GFX9-LABEL: name: test_fshl_s8_s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[COPY4]] + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[TRUNC]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC2]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC4]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s32), [[AND5]] + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC5]], [[TRUNC6]] + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT3]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s8) = G_TRUNC %0 + %4:_(s8) = G_TRUNC %1 + %5:_(s8) = G_TRUNC %2 + %6:_(s8) = G_FSHL %3, %4, %5 + %7:_(s32) = G_ANYEXT %6 + $vgpr0 = COPY %7 +... + +--- +name: test_fshl_s24_s24 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; SI-LABEL: name: test_fshl_s24_s24 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; SI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; SI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; SI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[MUL]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C4]] + ; SI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; SI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; SI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; SI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; SI: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; SI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; SI: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; SI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; SI: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; SI: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY5]], [[COPY6]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND3]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[COPY12]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND6]](s32), [[AND7]] + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; SI: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[COPY15]], [[COPY16]] + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[SELECT4]](s32) + ; SI: $vgpr0 = COPY [[COPY17]](s32) + ; VI-LABEL: name: test_fshl_s24_s24 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; VI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; VI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; VI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; VI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[MUL]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C4]] + ; VI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; VI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; VI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; VI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; VI: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; VI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; VI: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; VI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; VI: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; VI: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY5]], [[COPY6]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND3]](s32) + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[COPY12]] + ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; VI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND6]](s32), [[AND7]] + ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; VI: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[COPY15]], [[COPY16]] + ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[SELECT4]](s32) + ; VI: $vgpr0 = COPY [[COPY17]](s32) + ; GFX9-LABEL: name: test_fshl_s24_s24 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C4]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY5]], [[COPY6]] + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND3]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[COPY12]] + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND6]](s32), [[AND7]] + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[COPY15]], [[COPY16]] + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[SELECT4]](s32) + ; GFX9: $vgpr0 = COPY [[COPY17]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s24) = G_TRUNC %0 + %4:_(s24) = G_TRUNC %1 + %5:_(s24) = G_TRUNC %2 + %6:_(s24) = G_FSHL %3, %4, %5 + %7:_(s32) = G_ANYEXT %6 + $vgpr0 = COPY %7 +... + +--- +name: test_fshl_v3s16_v3s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; SI-LABEL: name: test_fshl_v3s16_v3s16 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; SI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; SI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; SI: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT]], [[INSERT1]] + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[AND]](<4 x s16>), 0 + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY9]], [[COPY10]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY11]], [[COPY12]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY13]] + ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32) + ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<4 x s16>) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[AND1]](s32) + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C3]] + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[AND2]](s32) + ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C3]] + ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[AND3]](s32) + ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C3]] + ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL5]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C3]] + ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY23]], [[C1]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL6]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS4]](<4 x s16>), 0 + ; SI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV2]](<3 x s16>), 0 + ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<4 x s16>) + ; SI: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; SI: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C1]](s32) + ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C3]] + ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[BITCAST10]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]] + ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[AND7]](s32) + ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]] + ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]] + ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[AND9]](s32) + ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[SUB2]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C3]] + ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]] + ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[AND11]](s32) + ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]] + ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; SI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C3]] + ; SI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY33]], [[C1]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND15]], [[SHL8]] + ; SI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; SI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS5]](<4 x s16>), 0 + ; SI: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT3]](<3 x s16>), 0 + ; SI: [[OR6:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT6]], [[INSERT7]] + ; SI: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[OR6]](<4 x s16>), 0 + ; SI: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<4 x s16>) + ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) + ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C1]](s32) + ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) + ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C1]](s32) + ; SI: [[COPY34:%[0-9]+]]:_(s32) = COPY [[BITCAST14]](s32) + ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY34]], [[C3]] + ; SI: [[COPY35:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND16]](s32), [[COPY35]] + ; SI: [[COPY36:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY36]], [[C3]] + ; SI: [[COPY37:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND17]](s32), [[COPY37]] + ; SI: [[COPY38:%[0-9]+]]:_(s32) = COPY [[BITCAST15]](s32) + ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY38]], [[C3]] + ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND18]](s32), [[C2]] + ; SI: [[INSERT9:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<4 x s16>) + ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST16]](s32) + ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; SI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST17]](s32) + ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C1]](s32) + ; SI: [[INSERT10:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT4]](<3 x s16>), 0 + ; SI: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT10]](<4 x s16>) + ; SI: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST18]](s32) + ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C1]](s32) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; SI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST19]](s32) + ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C1]](s32) + ; SI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC3]] + ; SI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC4]] + ; SI: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC2]], [[TRUNC5]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL9]] + ; SI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; SI: [[COPY39:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY39]], [[C1]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL10]] + ; SI: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>) + ; SI: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS6]](<4 x s16>), 0 + ; SI: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS7:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT5]](<3 x s16>), [[EXTRACT6]](<3 x s16>) + ; SI: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<6 x s16>) + ; SI: $vgpr0 = COPY [[UV20]](<2 x s16>) + ; SI: $vgpr1 = COPY [[UV21]](<2 x s16>) + ; SI: $vgpr2 = COPY [[UV22]](<2 x s16>) + ; VI-LABEL: name: test_fshl_v3s16_v3s16 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; VI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; VI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; VI: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16 + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT]], [[INSERT1]] + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[AND]](<4 x s16>), 0 + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC]] + ; VI: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; VI: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC2]] + ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C3]](s32) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C3]](s32) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT4]](<4 x s16>) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C3]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C3]](s32) + ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC6]](s16) + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[TRUNC7]](s16) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[TRUNC8]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]] + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL4]](s16) + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS4]](<4 x s16>), 0 + ; VI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV2]](<3 x s16>), 0 + ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<4 x s16>) + ; VI: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C3]](s32) + ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C3]](s32) + ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[SUB]](s16) + ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[SUB1]](s16) + ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[SUB2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR8]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR9]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL7]] + ; VI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR10]](s16) + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C3]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL8]] + ; VI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; VI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS5]](<4 x s16>), 0 + ; VI: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT3]](<3 x s16>), 0 + ; VI: [[OR6:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT6]], [[INSERT7]] + ; VI: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[OR6]](<4 x s16>), 0 + ; VI: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT8]](<4 x s16>) + ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) + ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST14]](s32) + ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C3]](s32) + ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) + ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST15]](s32) + ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C3]](s32) + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC12]](s16), [[C1]] + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC13]](s16), [[C1]] + ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC14]](s16), [[C1]] + ; VI: [[INSERT9:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT9]](<4 x s16>) + ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST16]](s32) + ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C3]](s32) + ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; VI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) + ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST17]](s32) + ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C3]](s32) + ; VI: [[INSERT10:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT4]](<3 x s16>), 0 + ; VI: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT10]](<4 x s16>) + ; VI: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) + ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST18]](s32) + ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C3]](s32) + ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; VI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST19]](s32) + ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C3]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC15]], [[TRUNC18]] + ; VI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC16]], [[TRUNC19]] + ; VI: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC17]], [[TRUNC20]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C3]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL9]] + ; VI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C3]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL10]] + ; VI: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI: [[CONCAT_VECTORS6:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>) + ; VI: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS6]](<4 x s16>), 0 + ; VI: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS7:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT5]](<3 x s16>), [[EXTRACT6]](<3 x s16>) + ; VI: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS7]](<6 x s16>) + ; VI: $vgpr0 = COPY [[UV20]](<2 x s16>) + ; VI: $vgpr1 = COPY [[UV21]](<2 x s16>) + ; VI: $vgpr2 = COPY [[UV22]](<2 x s16>) + ; GFX9-LABEL: name: test_fshl_v3s16_v3s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[DEF1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[COPY8]](s32) + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<4 x s16>), 0 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT]], [[INSERT1]] + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[AND]](<4 x s16>), 0 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[DEF1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C2]](s32), [[COPY11]](s32) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF1]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9: [[SUB1:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]] + ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SUB]](<2 x s16>), [[SUB1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS4]](<6 x s16>), 0 + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32 + ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT6]](<4 x s16>), 32 + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT3]], [[EXTRACT5]](<2 x s16>) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT4]], [[EXTRACT6]](s16) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT7:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF3]](<4 x s16>), 0 + ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT7]](<3 x s16>), 0 + ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT7]], [[SHL]](<2 x s16>), 0 + ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT8]](<4 x s16>), 0 + ; GFX9: [[INSERT9:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT8]](<3 x s16>), 0 + ; GFX9: [[INSERT10:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT9]], [[SHL1]](s16), 32 + ; GFX9: [[EXTRACT9:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT10]](<4 x s16>), 0 + ; GFX9: [[INSERT11:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0 + ; GFX9: [[EXTRACT10:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT11]](<4 x s16>), 0 + ; GFX9: [[INSERT12:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV2]](<3 x s16>), 0 + ; GFX9: [[EXTRACT11:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT12]](<4 x s16>), 32 + ; GFX9: [[INSERT13:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[EXTRACT12:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT13]](<4 x s16>), 0 + ; GFX9: [[INSERT14:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[EXTRACT13:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT14]](<4 x s16>), 32 + ; GFX9: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT10]], [[EXTRACT12]](<2 x s16>) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT11]], [[EXTRACT13]](s16) + ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT14:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF4]](<4 x s16>), 0 + ; GFX9: [[INSERT15:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT14]](<3 x s16>), 0 + ; GFX9: [[INSERT16:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT15]], [[LSHR2]](<2 x s16>), 0 + ; GFX9: [[EXTRACT15:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT16]](<4 x s16>), 0 + ; GFX9: [[INSERT17:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT15]](<3 x s16>), 0 + ; GFX9: [[INSERT18:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT17]], [[LSHR3]](s16), 32 + ; GFX9: [[EXTRACT16:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT18]](<4 x s16>), 0 + ; GFX9: [[INSERT19:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT9]](<3 x s16>), 0 + ; GFX9: [[INSERT20:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT16]](<3 x s16>), 0 + ; GFX9: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT19]], [[INSERT20]] + ; GFX9: [[EXTRACT17:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[OR]](<4 x s16>), 0 + ; GFX9: [[INSERT21:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT21]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s16), [[C]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC1]](s16), [[C]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC2]](s16), [[C]] + ; GFX9: [[INSERT22:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT22]](<4 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C2]](s32) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C2]](s32) + ; GFX9: [[INSERT23:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT17]](<3 x s16>), 0 + ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT23]](<4 x s16>) + ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C2]](s32) + ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C2]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC3]], [[TRUNC6]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC4]], [[TRUNC7]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC5]], [[TRUNC8]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[DEF1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY15]](s32) + ; GFX9: [[CONCAT_VECTORS5:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) + ; GFX9: [[EXTRACT18:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS5]](<4 x s16>), 0 + ; GFX9: [[EXTRACT19:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS6:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT18]](<3 x s16>), [[EXTRACT19]](<3 x s16>) + ; GFX9: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS6]](<6 x s16>) + ; GFX9: $vgpr0 = COPY [[UV14]](<2 x s16>) + ; GFX9: $vgpr1 = COPY [[UV15]](<2 x s16>) + ; GFX9: $vgpr2 = COPY [[UV16]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = COPY $vgpr2 + %3:_(<2 x s16>) = COPY $vgpr3 + %4:_(<2 x s16>) = COPY $vgpr4 + %5:_(<2 x s16>) = COPY $vgpr5 + %6:_(<2 x s16>) = G_IMPLICIT_DEF + %7:_(<6 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>), %6(<2 x s16>) + %8:_(<3 x s16>), %9:_(<3 x s16>) = G_UNMERGE_VALUES %7(<6 x s16>) + %10:_(<6 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %6(<2 x s16>) + %11:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %10(<6 x s16>) + %13:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>) + %14:_(<3 x s16>), %15:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) + %16:_(<3 x s16>) = G_FSHL %8, %11, %14(<3 x s16>) + %17:_(<3 x s16>) = G_IMPLICIT_DEF + %18:_(<6 x s16>) = G_CONCAT_VECTORS %16(<3 x s16>), %17(<3 x s16>) + %19:_(<2 x s16>), %20:_(<2 x s16>), %21:_(<2 x s16>) = G_UNMERGE_VALUES %18(<6 x s16>) + $vgpr0 = COPY %19(<2 x s16>) + $vgpr1 = COPY %20(<2 x s16>) + $vgpr2 = COPY %21(<2 x s16>) +... + +--- +name: test_fshl_v4s16_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; SI-LABEL: name: test_fshl_v4s16_v4s16 + ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C1]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; SI: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[COPY2]], [[CONCAT_VECTORS]] + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY6]], [[COPY7]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY8]], [[COPY9]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY10]], [[COPY11]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY12]] + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32) + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[AND1]](s32) + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C2]] + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[AND2]](s32) + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C2]] + ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[AND3]](s32) + ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C2]] + ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[AND4]](s32) + ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C2]] + ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C2]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL6]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C2]] + ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[SHL5]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C2]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; SI: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C1]](s32) + ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C2]] + ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST10]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C2]] + ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[AND9]](s32) + ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C2]] + ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C2]] + ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[AND11]](s32) + ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[SUB2]](s32) + ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C2]] + ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32) + ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C2]] + ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[AND13]](s32) + ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) + ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C2]] + ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C2]] + ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[AND15]](s32) + ; SI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY33]], [[C2]] + ; SI: [[COPY34:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY34]], [[C2]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C1]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[SHL8]] + ; SI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[COPY35:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C2]] + ; SI: [[COPY36:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY36]], [[C2]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C1]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL9]] + ; SI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; SI: [[OR6:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C1]](s32) + ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C1]](s32) + ; SI: [[COPY37:%[0-9]+]]:_(s32) = COPY [[BITCAST14]](s32) + ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY37]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[COPY38:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND21]](s32), [[COPY38]] + ; SI: [[COPY39:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY39]], [[C2]] + ; SI: [[COPY40:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND22]](s32), [[COPY40]] + ; SI: [[COPY41:%[0-9]+]]:_(s32) = COPY [[BITCAST15]](s32) + ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY41]], [[C2]] + ; SI: [[COPY42:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND23]](s32), [[COPY42]] + ; SI: [[COPY43:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; SI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY43]], [[C2]] + ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND24]](s32), [[C3]] + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST16]](s32) + ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32) + ; SI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST17]](s32) + ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C1]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR6]](<4 x s16>) + ; SI: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST18]](s32) + ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C1]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) + ; SI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST19]](s32) + ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C1]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) + ; SI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC4]] + ; SI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC5]] + ; SI: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC2]], [[TRUNC6]] + ; SI: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[TRUNC3]], [[TRUNC7]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL10]] + ; SI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL11]] + ; SI: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS3]](<4 x s16>) + ; VI-LABEL: name: test_fshl_v4s16_v4s16 + ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16 + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; VI: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[COPY2]], [[CONCAT_VECTORS]] + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC]] + ; VI: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; VI: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC2]] + ; VI: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC3]] + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C3]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C3]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C3]](s32) + ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C3]](s32) + ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[TRUNC8]](s16) + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[TRUNC9]](s16) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC6]], [[TRUNC10]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC7]], [[TRUNC11]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL3]](s16) + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL4]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SHL5]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C3]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C3]](s32) + ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C3]](s32) + ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC12]], [[SUB]](s16) + ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC13]], [[SUB1]](s16) + ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC14]], [[SUB2]](s16) + ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC15]], [[SUB3]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR8]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR9]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR10]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR11]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C3]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL9]] + ; VI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; VI: [[OR6:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST14]](s32) + ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C3]](s32) + ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) + ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST15]](s32) + ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C3]](s32) + ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC16]](s16), [[C1]] + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC17]](s16), [[C1]] + ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC18]](s16), [[C1]] + ; VI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC19]](s16), [[C1]] + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST16]](s32) + ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C3]](s32) + ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32) + ; VI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST17]](s32) + ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C3]](s32) + ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR6]](<4 x s16>) + ; VI: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; VI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST18]](s32) + ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C3]](s32) + ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) + ; VI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; VI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST19]](s32) + ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C3]](s32) + ; VI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC20]], [[TRUNC24]] + ; VI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC21]], [[TRUNC25]] + ; VI: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC22]], [[TRUNC26]] + ; VI: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[TRUNC23]], [[TRUNC27]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C3]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL10]] + ; VI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C3]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL11]] + ; VI: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS3]](<4 x s16>) + ; GFX9-LABEL: name: test_fshl_v4s16_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C1]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[COPY2]], [[CONCAT_VECTORS]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[C2]](s32) + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; GFX9: [[SUB:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR_TRUNC2]], [[UV]] + ; GFX9: [[SUB1:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR_TRUNC3]], [[UV1]] + ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV2]], [[UV4]](<2 x s16>) + ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV3]], [[UV5]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SHL]](<2 x s16>), [[SHL1]](<2 x s16>) + ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV6]], [[SUB]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV7]], [[SUB1]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LSHR]](<2 x s16>), [[LSHR1]](<2 x s16>) + ; GFX9: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s16), [[C]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC1]](s16), [[C]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC2]](s16), [[C]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC3]](s16), [[C]] + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32) + ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C2]](s32) + ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C2]](s32) + ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC4]], [[TRUNC8]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC5]], [[TRUNC9]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC6]], [[TRUNC10]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[TRUNC7]], [[TRUNC11]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS3]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 + %3:_(<4 x s16>) = G_FSHL %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -90,12 +90,31 @@ ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s16) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[ANYEXT]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[AND1]](s32) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ZEXT1]](s32), [[C3]] + ; SI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[OR]] + ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT1]](s32) ; VI-LABEL: name: test_fshr_s16_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -103,8 +122,17 @@ ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s16) + ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16 + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; VI: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[AND]] + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[SUB]](s16) + ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s16), [[C1]] + ; VI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC1]], [[OR]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fshr_s16_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -113,8 +141,17 @@ ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s16) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16 + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[AND]] + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[SUB]](s16) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s16), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC1]], [[OR]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -137,35 +174,191 @@ ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](<2 x s16>) - ; SI: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[UV]], [[UV2]], [[UV4]](s16) - ; SI: [[FSHR1:%[0-9]+]]:_(s16) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSHR]](s16), [[FSHR1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BITCAST]] + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY4]], [[COPY5]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY6]] + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND1]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[AND2]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32) + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[AND5]](s32) + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C2]] + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C2]] + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[AND7]](s32) + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C2]] + ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C2]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL4]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[OR3:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST3]], [[BITCAST6]] + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) + ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND11]](s32), [[COPY20]] + ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C2]] + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND12]](s32), [[C3]] + ; SI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[OR3]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C1]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; SI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC2]] + ; SI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC3]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) ; VI-LABEL: name: test_fshr_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](<2 x s16>) - ; VI: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[UV]], [[UV2]], [[UV4]](s16) - ; VI: [[FSHR1:%[0-9]+]]:_(s16) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSHR]](s16), [[FSHR1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16 + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BITCAST]] + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC]] + ; VI: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[SUB]](s16) + ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[SUB1]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C3]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C3]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[TRUNC6]](s16) + ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[TRUNC7]](s16) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[OR3:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST3]], [[BITCAST6]] + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C3]](s32) + ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC8]](s16), [[C1]] + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC9]](s16), [[C1]] + ; VI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C3]](s32) + ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[OR3]](<2 x s16>) + ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C3]](s32) + ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC10]], [[TRUNC12]] + ; VI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC11]], [[TRUNC13]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL5]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) ; GFX9-LABEL: name: test_fshr_v2s16_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY2]](<2 x s16>) - ; GFX9: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[UV]], [[UV2]], [[UV4]](s16) - ; GFX9: [[FSHR1:%[0-9]+]]:_(s16) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSHR]](s16), [[FSHR1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9: [[SUB:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR_TRUNC]], [[AND]] + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[SUB]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[AND]](<2 x s16>) + ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s16), [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC1]](s16), [[C1]] + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC2]], [[TRUNC4]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC3]], [[TRUNC5]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -183,20 +376,59 @@ ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI: [[FSHR:%[0-9]+]]:_(s64) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s64) - ; SI: $vgpr0_vgpr1 = COPY [[FSHR]](s64) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; SI: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; SI: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[USUBO]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC]](s32) + ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s64), [[C1]] + ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY1]], [[OR]] + ; SI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; VI-LABEL: name: test_fshr_s64_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI: [[FSHR:%[0-9]+]]:_(s64) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s64) - ; VI: $vgpr0_vgpr1 = COPY [[FSHR]](s64) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; VI: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; VI: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[USUBO]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC]](s32) + ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s64), [[C1]] + ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY1]], [[OR]] + ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: test_fshr_s64_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX9: [[FSHR:%[0-9]+]]:_(s64) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[FSHR]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[USUBO]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s64), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY1]], [[OR]] + ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = COPY $vgpr4_vgpr5 @@ -214,32 +446,114 @@ ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; SI: [[FSHR:%[0-9]+]]:_(s8) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s8) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s8) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[COPY4]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY5]], [[COPY6]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND1]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[COPY12]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C3]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s32), [[AND5]] + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; SI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fshr_s8_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; VI: [[FSHR:%[0-9]+]]:_(s8) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s8) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[COPY4]] + ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; VI: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[TRUNC]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC2]](s16) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC4]](s16) + ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s32), [[AND5]] + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC5]], [[TRUNC6]] + ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT3]](s32) ; GFX9-LABEL: name: test_fshr_s8_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[FSHR:%[0-9]+]]:_(s8) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s8) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s8) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[COPY4]] + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[TRUNC]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC2]](s16) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC4]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s32), [[AND5]] + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC5]], [[TRUNC6]] + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -261,32 +575,185 @@ ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s24) = G_TRUNC [[COPY1]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s24) = G_TRUNC [[COPY2]](s32) - ; SI: [[FSHR:%[0-9]+]]:_(s24) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s24) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s24) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; SI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; SI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; SI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[MUL]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C4]] + ; SI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; SI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; SI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; SI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; SI: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; SI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; SI: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; SI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; SI: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; SI: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY5]], [[COPY6]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND3]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[COPY12]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND6]](s32), [[AND7]] + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; SI: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[COPY15]], [[COPY16]] + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[SELECT4]](s32) + ; SI: $vgpr0 = COPY [[COPY17]](s32) ; VI-LABEL: name: test_fshr_s24_s24 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s24) = G_TRUNC [[COPY1]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(s24) = G_TRUNC [[COPY2]](s32) - ; VI: [[FSHR:%[0-9]+]]:_(s24) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s24) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s24) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; VI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; VI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; VI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; VI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[MUL]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C4]] + ; VI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; VI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; VI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; VI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; VI: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; VI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; VI: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; VI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; VI: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; VI: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY5]], [[COPY6]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND3]](s32) + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[COPY12]] + ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; VI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND6]](s32), [[AND7]] + ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; VI: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[COPY15]], [[COPY16]] + ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[SELECT4]](s32) + ; VI: $vgpr0 = COPY [[COPY17]](s32) ; GFX9-LABEL: name: test_fshr_s24_s24 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s24) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s24) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[FSHR:%[0-9]+]]:_(s24) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s24) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s24) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C4]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY5]], [[COPY6]] + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND3]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[COPY12]] + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND6]](s32), [[AND7]] + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[COPY15]], [[COPY16]] + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[SELECT4]](s32) + ; GFX9: $vgpr0 = COPY [[COPY17]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -297,3 +764,889 @@ %7:_(s32) = G_ANYEXT %6 $vgpr0 = COPY %7 ... + +--- +name: test_fshr_v3s16_v3s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; SI-LABEL: name: test_fshr_v3s16_v3s16 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; SI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; SI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; SI: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV2]](<3 x s16>), 0 + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[SHL2]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[BITCAST6]], [[BITCAST8]] + ; SI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[AND3]](<2 x s16>) + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY11]], [[COPY12]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY13]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[AND4]](s32) + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C1]] + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[AND5]](s32) + ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C1]] + ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C1]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[AND3]](<2 x s16>) + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) + ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C1]] + ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C1]] + ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[AND8]](s32) + ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C1]] + ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C1]] + ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[AND10]](s32) + ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C1]] + ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C1]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] + ; SI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[OR5:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST10]], [[BITCAST12]] + ; SI: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[AND3]](<2 x s16>) + ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST13]], [[C]](s32) + ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST13]](s32) + ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C1]] + ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND14]](s32), [[COPY27]] + ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C1]] + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND15]](s32), [[C2]] + ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[OR5]](<2 x s16>) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST14]](s32) + ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C]](s32) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; SI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC3]] + ; SI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC4]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] + ; SI: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY30]], [[C]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY29]], [[SHL8]] + ; SI: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: [[AND16:%[0-9]+]]:_(<2 x s16>) = G_AND [[BITCAST7]], [[BITCAST16]] + ; SI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[AND16]](<2 x s16>) + ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[BITCAST17]](s32) + ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY31]], [[COPY32]] + ; SI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[COPY34:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[COPY33]], [[COPY34]] + ; SI: [[COPY35:%[0-9]+]]:_(s32) = COPY [[SUB2]](s32) + ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C1]] + ; SI: [[COPY36:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY36]], [[AND17]](s32) + ; SI: [[COPY37:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) + ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY37]], [[C1]] + ; SI: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[DEF3]], [[AND18]](s32) + ; SI: [[COPY38:%[0-9]+]]:_(s32) = COPY [[SHL9]](s32) + ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY38]], [[C1]] + ; SI: [[COPY39:%[0-9]+]]:_(s32) = COPY [[SHL10]](s32) + ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY39]], [[C1]] + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL11]] + ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[AND16]](<2 x s16>) + ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C]](s32) + ; SI: [[COPY40:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32) + ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY40]], [[C1]] + ; SI: [[COPY41:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY41]], [[C1]] + ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[AND21]](s32) + ; SI: [[COPY42:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY42]], [[C1]] + ; SI: [[COPY43:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY43]], [[AND23]](s32) + ; SI: [[COPY44:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; SI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY44]], [[C1]] + ; SI: [[COPY45:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY45]], [[C1]] + ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND24]], [[SHL12]] + ; SI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; SI: [[OR10:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST18]], [[BITCAST20]] + ; SI: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[AND16]](<2 x s16>) + ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C]](s32) + ; SI: [[COPY46:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32) + ; SI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY46]], [[C1]] + ; SI: [[COPY47:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND26]](s32), [[COPY47]] + ; SI: [[COPY48:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; SI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY48]], [[C1]] + ; SI: [[COPY49:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND27]](s32), [[COPY49]] + ; SI: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[OR10]](<2 x s16>) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST22]](s32) + ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C]](s32) + ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) + ; SI: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC2]], [[TRUNC5]] + ; SI: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[DEF2]], [[TRUNC6]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL13]] + ; SI: [[BITCAST23:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x s16>), [[BITCAST23]](<2 x s16>), [[DEF]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<6 x s16>), 0 + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) + ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<6 x s16>) + ; SI: $vgpr0 = COPY [[UV12]](<2 x s16>) + ; SI: $vgpr1 = COPY [[UV13]](<2 x s16>) + ; SI: $vgpr2 = COPY [[UV14]](<2 x s16>) + ; VI-LABEL: name: test_fshr_v3s16_v3s16 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; VI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; VI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; VI: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV2]](<3 x s16>), 0 + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 16 + ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL2]] + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[BITCAST6]], [[BITCAST8]] + ; VI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[AND3]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[TRUNC6]] + ; VI: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[TRUNC7]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[SUB]](s16) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[SUB1]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL3]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[AND3]](<2 x s16>) + ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) + ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[TRUNC8]](s16) + ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[TRUNC9]](s16) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR8]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR9]](s16) + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]] + ; VI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[OR5:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST10]], [[BITCAST12]] + ; VI: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[AND3]](<2 x s16>) + ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST13]](s32) + ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST13]], [[C]](s32) + ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC10]](s16), [[C4]] + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC11]](s16), [[C4]] + ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[OR5]](<2 x s16>) + ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST14]](s32) + ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C]](s32) + ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC3]], [[TRUNC12]] + ; VI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC4]], [[TRUNC13]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]] + ; VI: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[SHL8]] + ; VI: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[AND4:%[0-9]+]]:_(<2 x s16>) = G_AND [[BITCAST7]], [[BITCAST16]] + ; VI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[AND4]](<2 x s16>) + ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST17]](s32) + ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) + ; VI: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[TRUNC14]] + ; VI: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[TRUNC15]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[SUB2]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[DEF2]], [[SUB3]](s16) + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[SHL9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[SHL10]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[AND4]](<2 x s16>) + ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST19]](s32) + ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C]](s32) + ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; VI: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[TRUNC16]](s16) + ; VI: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[DEF2]], [[TRUNC17]](s16) + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR14]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR15]](s16) + ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) + ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL12]] + ; VI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST18]], [[BITCAST20]] + ; VI: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[AND4]](<2 x s16>) + ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST21]](s32) + ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C]](s32) + ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) + ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC18]](s16), [[C4]] + ; VI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC19]](s16), [[C4]] + ; VI: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[OR10]](<2 x s16>) + ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST22]](s32) + ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C]](s32) + ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) + ; VI: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC5]], [[TRUNC20]] + ; VI: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[DEF2]], [[TRUNC21]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL13]] + ; VI: [[BITCAST23:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; VI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST15]](<2 x s16>), [[BITCAST23]](<2 x s16>), [[DEF]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<6 x s16>), 0 + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) + ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<6 x s16>) + ; VI: $vgpr0 = COPY [[UV12]](<2 x s16>) + ; VI: $vgpr1 = COPY [[UV13]](<2 x s16>) + ; VI: $vgpr2 = COPY [[UV14]](<2 x s16>) + ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[DEF3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV2]](<3 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[DEF3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF3]](s32) + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[C2]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC7]] + ; GFX9: [[SUB:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR_TRUNC6]], [[AND]] + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[SUB]](<2 x s16>) + ; GFX9: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC2]], [[AND]](<2 x s16>) + ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR6]] + ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC3]](s16), [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC4]](s16), [[C1]] + ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC5]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC6]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32) + ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC10]] + ; GFX9: [[SUB1:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR_TRUNC9]], [[AND1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[SUB1]](<2 x s16>) + ; GFX9: [[LSHR9:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND1]](<2 x s16>) + ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR9]] + ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[AND1]](<2 x s16>) + ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC7]](s16), [[C1]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC8]](s16), [[C1]] + ; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) + ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC2]], [[TRUNC9]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[DEF2]], [[TRUNC10]] + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<6 x s16>), 0 + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) + ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<6 x s16>) + ; GFX9: $vgpr0 = COPY [[UV12]](<2 x s16>) + ; GFX9: $vgpr1 = COPY [[UV13]](<2 x s16>) + ; GFX9: $vgpr2 = COPY [[UV14]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = COPY $vgpr2 + %3:_(<2 x s16>) = COPY $vgpr3 + %4:_(<2 x s16>) = COPY $vgpr4 + %5:_(<2 x s16>) = COPY $vgpr5 + %6:_(<2 x s16>) = G_IMPLICIT_DEF + %7:_(<6 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>), %6(<2 x s16>) + %8:_(<3 x s16>), %9:_(<3 x s16>) = G_UNMERGE_VALUES %7(<6 x s16>) + %10:_(<6 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %6(<2 x s16>) + %11:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %10(<6 x s16>) + %13:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>) + %14:_(<3 x s16>), %15:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) + %16:_(<3 x s16>) = G_FSHR %8, %11, %14(<3 x s16>) + %17:_(<3 x s16>) = G_IMPLICIT_DEF + %18:_(<6 x s16>) = G_CONCAT_VECTORS %16(<3 x s16>), %17(<3 x s16>) + %19:_(<2 x s16>), %20:_(<2 x s16>), %21:_(<2 x s16>) = G_UNMERGE_VALUES %18(<6 x s16>) + $vgpr0 = COPY %19(<2 x s16>) + $vgpr1 = COPY %20(<2 x s16>) + $vgpr2 = COPY %21(<2 x s16>) +... + +--- +name: test_fshr_v4s16_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; SI-LABEL: name: test_fshr_v4s16_v4s16 + ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BITCAST]] + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY4]], [[COPY5]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY6]] + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[AND1]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[AND2]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32) + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[AND5]](s32) + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C2]] + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C2]] + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[AND7]](s32) + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C2]] + ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C2]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL4]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[OR3:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST3]], [[BITCAST6]] + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) + ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND11]](s32), [[COPY20]] + ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C2]] + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND12]](s32), [[C3]] + ; SI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[OR3]](<2 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C1]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; SI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC2]] + ; SI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC3]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY23]], [[C1]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY22]], [[SHL6]] + ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[AND13:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BITCAST11]] + ; SI: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[AND13]](<2 x s16>) + ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C1]](s32) + ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[BITCAST12]](s32) + ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY24]], [[COPY25]] + ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[COPY26]], [[COPY27]] + ; SI: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST13]], [[C1]](s32) + ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[SUB2]](s32) + ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C2]] + ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST13]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY29]], [[AND14]](s32) + ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) + ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C2]] + ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY31]], [[AND15]](s32) + ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[SHL7]](s32) + ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C2]] + ; SI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[SHL8]](s32) + ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY33]], [[C2]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C1]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL9]] + ; SI: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C1]](s32) + ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[AND13]](<2 x s16>) + ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C1]](s32) + ; SI: [[COPY34:%[0-9]+]]:_(s32) = COPY [[BITCAST16]](s32) + ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY34]], [[C2]] + ; SI: [[COPY35:%[0-9]+]]:_(s32) = COPY [[BITCAST15]](s32) + ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C2]] + ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[AND18]](s32) + ; SI: [[COPY36:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY36]], [[C2]] + ; SI: [[COPY37:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY37]], [[C2]] + ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[AND20]](s32) + ; SI: [[COPY38:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY38]], [[C2]] + ; SI: [[COPY39:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY39]], [[C2]] + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C1]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND22]], [[SHL10]] + ; SI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: [[OR8:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST14]], [[BITCAST17]] + ; SI: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[AND13]](<2 x s16>) + ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C1]](s32) + ; SI: [[COPY40:%[0-9]+]]:_(s32) = COPY [[BITCAST18]](s32) + ; SI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY40]], [[C2]] + ; SI: [[COPY41:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND24]](s32), [[COPY41]] + ; SI: [[COPY42:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY42]], [[C2]] + ; SI: [[COPY43:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND25]](s32), [[COPY43]] + ; SI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST19]](s32) + ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C1]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) + ; SI: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[OR8]](<2 x s16>) + ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST20]](s32) + ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C1]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) + ; SI: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC4]], [[TRUNC6]] + ; SI: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[TRUNC5]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL11]] + ; SI: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST21]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-LABEL: name: test_fshr_v4s16_v4s16 + ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16 + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BITCAST]] + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC]] + ; VI: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[SUB]](s16) + ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[SUB1]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C3]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C3]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[TRUNC6]](s16) + ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[TRUNC7]](s16) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[OR3:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST3]], [[BITCAST6]] + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C3]](s32) + ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC8]](s16), [[C1]] + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC9]](s16), [[C1]] + ; VI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C3]](s32) + ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[OR3]](<2 x s16>) + ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C3]](s32) + ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC10]], [[TRUNC12]] + ; VI: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC11]], [[TRUNC13]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL5]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C3]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL6]] + ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BITCAST11]] + ; VI: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[AND1]](<2 x s16>) + ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) + ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C3]](s32) + ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; VI: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC14]] + ; VI: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC15]] + ; VI: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST13]](s32) + ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST13]], [[C3]](s32) + ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[TRUNC16]], [[SUB2]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC17]], [[SUB3]](s16) + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[SHL7]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[SHL8]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C3]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL9]] + ; VI: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST15]](s32) + ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C3]](s32) + ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[AND1]](<2 x s16>) + ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST16]](s32) + ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C3]](s32) + ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) + ; VI: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC18]], [[TRUNC20]](s16) + ; VI: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC19]], [[TRUNC21]](s16) + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR13]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR14]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C3]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL10]] + ; VI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[OR8:%[0-9]+]]:_(<2 x s16>) = G_OR [[BITCAST14]], [[BITCAST17]] + ; VI: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[AND1]](<2 x s16>) + ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST18]](s32) + ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C3]](s32) + ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC22]](s16), [[C1]] + ; VI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC23]](s16), [[C1]] + ; VI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST19]](s32) + ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C3]](s32) + ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) + ; VI: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[OR8]](<2 x s16>) + ; VI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST20]](s32) + ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C3]](s32) + ; VI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) + ; VI: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC24]], [[TRUNC26]] + ; VI: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[TRUNC25]], [[TRUNC27]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C3]](s32) + ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL11]] + ; VI: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST21]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-LABEL: name: test_fshr_v4s16_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9: [[SUB:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR_TRUNC]], [[AND]] + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[SUB]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[AND]](<2 x s16>) + ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s16), [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC1]](s16), [[C1]] + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC2]], [[TRUNC4]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC3]], [[TRUNC5]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9: [[SUB1:%[0-9]+]]:_(<2 x s16>) = G_SUB [[BUILD_VECTOR_TRUNC3]], [[AND1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[SUB1]](<2 x s16>) + ; GFX9: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[AND1]](<2 x s16>) + ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR4]] + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[AND1]](<2 x s16>) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC6]](s16), [[C1]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC7]](s16), [[C1]] + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) + ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC8]], [[TRUNC10]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[TRUNC9]], [[TRUNC11]] + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 + %3:_(<4 x s16>) = G_FSHR %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +...