Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -99,9 +99,18 @@ bool legalizeUDIV_UREM(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + + void legalizeUDIV_UREM32Impl(MachineIRBuilder &B, + Register DstReg, Register Num, Register Den, + bool IsRem) const; bool legalizeUDIV_UREM32(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + bool legalizeSDIV_SREM32(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeSDIV_SREM(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; bool legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI, Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1275,6 +1275,9 @@ case TargetOpcode::G_UDIV: case TargetOpcode::G_UREM: return legalizeUDIV_UREM(MI, MRI, B); + case TargetOpcode::G_SDIV: + case TargetOpcode::G_SREM: + return legalizeSDIV_SREM(MI, MRI, B); case TargetOpcode::G_ATOMIC_CMPXCHG: return legalizeAtomicCmpXChg(MI, MRI, B); case TargetOpcode::G_FLOG: @@ -2250,19 +2253,14 @@ return B.buildFPTOUI(S32, Mul).getReg(0); } -bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { - B.setInstr(MI); - bool IsRem = MI.getOpcode() == AMDGPU::G_UREM; - +void AMDGPULegalizerInfo::legalizeUDIV_UREM32Impl(MachineIRBuilder &B, + Register DstReg, + Register Num, + Register Den, + bool IsRem) const { const LLT S1 = LLT::scalar(1); const LLT S32 = LLT::scalar(32); - Register DstReg = MI.getOperand(0).getReg(); - Register Num = MI.getOperand(1).getReg(); - Register Den = MI.getOperand(2).getReg(); - // RCP = URECIP(Den) = 2^32 / Den + e // e is rounding error. auto RCP = buildDivRCP(B, Den); @@ -2343,7 +2341,17 @@ } else { B.buildSelect(DstReg, Remainder_GE_Zero, Div, Quotient_S_One); } +} +bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + const bool IsRem = MI.getOpcode() == AMDGPU::G_UREM; + Register DstReg = MI.getOperand(0).getReg(); + Register Num = MI.getOperand(1).getReg(); + Register Den = MI.getOperand(2).getReg(); + legalizeUDIV_UREM32Impl(B, DstReg, Num, Den, IsRem); MI.eraseFromParent(); return true; } @@ -2356,6 +2364,52 @@ return false; } +bool AMDGPULegalizerInfo::legalizeSDIV_SREM32(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + const LLT S32 = LLT::scalar(32); + + const bool IsRem = MI.getOpcode() == AMDGPU::G_SREM; + Register DstReg = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + auto ThirtyOne = B.buildConstant(S32, 31); + auto LHSign = B.buildAShr(S32, LHS, ThirtyOne); + auto RHSign = B.buildAShr(S32, LHS, ThirtyOne); + + LHS = B.buildAdd(S32, LHS, LHSign).getReg(0); + RHS = B.buildAdd(S32, RHS, RHSign).getReg(0); + + LHS = B.buildXor(S32, LHS, LHSign).getReg(0); + RHS = B.buildXor(S32, RHS, RHSign).getReg(0); + + Register UDivRem = MRI.createGenericVirtualRegister(S32); + legalizeUDIV_UREM32Impl(B, UDivRem, LHS, RHS, IsRem); + + if (IsRem) { + auto RSign = LHSign; // Remainder sign is the same as LHS + UDivRem = B.buildXor(S32, UDivRem, RSign).getReg(0); + B.buildSub(DstReg, UDivRem, RSign); + } else { + auto DSign = B.buildXor(S32, LHSign, RHSign); + UDivRem = B.buildXor(S32, UDivRem, DSign).getReg(0); + B.buildSub(DstReg, UDivRem, DSign); + } + + MI.eraseFromParent(); + return true; +} + +bool AMDGPULegalizerInfo::legalizeSDIV_SREM(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + if (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32)) + return legalizeSDIV_SREM32(MI, MRI, B); + return false; +} + bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -12,18 +12,123 @@ ; GFX6-LABEL: name: test_sdiv_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]] - ; GFX6: $vgpr0 = COPY [[SDIV]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: $vgpr0 = COPY [[SUB4]](s32) ; GFX8-LABEL: name: test_sdiv_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]] - ; GFX8: $vgpr0 = COPY [[SDIV]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: $vgpr0 = COPY [[SUB4]](s32) ; GFX9-LABEL: name: test_sdiv_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SDIV]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: $vgpr0 = COPY [[SUB4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SDIV %0, %1 @@ -41,27 +146,228 @@ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV2]] - ; GFX6: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] + ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]] + ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX6: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_sdiv_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV2]] - ; GFX8: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] + ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]] + ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX8: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_sdiv_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV2]] - ; GFX9: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] + ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]] + ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX9: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -144,36 +450,138 @@ ; GFX6-LABEL: name: test_sdiv_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) - ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_sdiv_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) - ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_sdiv_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) - ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -192,39 +600,280 @@ ; GFX6-LABEL: name: test_sdiv_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX6: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[UV]], [[UV2]] - ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; GFX6: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SDIV]](s16), [[TRUNC]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 + ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] + ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] + ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX6: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_sdiv_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX8: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[UV]], [[UV2]] - ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; GFX8: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SDIV]](s16), [[TRUNC]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 + ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 + ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] + ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] + ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX8: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_sdiv_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX9: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[UV]], [[UV2]] - ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; GFX9: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SDIV]](s16), [[TRUNC]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 + ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] + ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] + ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX9: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SDIV %0, %1 @@ -240,36 +889,138 @@ ; GFX6-LABEL: name: test_sdiv_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) - ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) - ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) - ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SDIV]](s32) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_sdiv_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) - ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) - ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SDIV]](s32) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_sdiv_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) - ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) - ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SDIV]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -288,36 +1039,138 @@ ; GFX6-LABEL: name: test_sdiv_s17 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) - ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) - ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) - ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SDIV]](s32) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_sdiv_s17 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) - ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) - ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SDIV]](s32) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_sdiv_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) - ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) - ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SDIV]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -12,18 +12,117 @@ ; GFX6-LABEL: name: test_srem_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]] - ; GFX6: $vgpr0 = COPY [[SREM]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6: $vgpr0 = COPY [[SUB4]](s32) ; GFX8-LABEL: name: test_srem_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]] - ; GFX8: $vgpr0 = COPY [[SREM]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8: $vgpr0 = COPY [[SUB4]](s32) ; GFX9-LABEL: name: test_srem_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SREM]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9: $vgpr0 = COPY [[SUB4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SREM %0, %1 @@ -41,27 +140,219 @@ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[UV]], [[UV2]] - ; GFX6: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[UV1]], [[UV3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SREM]](s32), [[SREM1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] + ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] + ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_srem_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[UV]], [[UV2]] - ; GFX8: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SREM]](s32), [[SREM1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] + ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] + ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_srem_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[UV]], [[UV2]] - ; GFX9: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SREM]](s32), [[SREM1]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] + ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] + ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -144,36 +435,138 @@ ; GFX6-LABEL: name: test_srem_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) - ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC2]](s16) - ; GFX6: $vgpr0 = COPY [[ZEXT]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX6: $vgpr0 = COPY [[AND1]](s32) ; GFX8-LABEL: name: test_srem_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) - ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC2]](s16) - ; GFX8: $vgpr0 = COPY [[ZEXT]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX8: $vgpr0 = COPY [[AND1]](s32) ; GFX9-LABEL: name: test_srem_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) - ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32) - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC2]](s16) - ; GFX9: $vgpr0 = COPY [[ZEXT]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: $vgpr0 = COPY [[AND1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -192,39 +585,271 @@ ; GFX6-LABEL: name: test_srem_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX6: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[UV]], [[UV2]] - ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; GFX6: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SREM]](s16), [[TRUNC]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 + ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] + ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] + ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_srem_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX8: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[UV]], [[UV2]] - ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; GFX8: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SREM]](s16), [[TRUNC]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 + ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 + ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] + ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] + ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_srem_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) - ; GFX9: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[UV]], [[UV2]] - ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) - ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) - ; GFX9: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SREM]](s16), [[TRUNC]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 + ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] + ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] + ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] + ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SREM %0, %1 @@ -240,36 +865,132 @@ ; GFX6-LABEL: name: test_srem_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) - ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) - ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) - ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SREM]](s32) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_srem_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) - ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) - ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SREM]](s32) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_srem_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) - ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) - ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SREM]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -288,36 +1009,132 @@ ; GFX6-LABEL: name: test_srem_s17 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) - ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) - ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) - ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SREM]](s32) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_srem_s17 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) - ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) - ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SREM]](s32) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_srem_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) - ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) - ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SREM]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]] + ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll @@ -0,0 +1,1075 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s + +; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. + +define i32 @v_sdiv_i32(i32 %num, i32 %den) { +; GISEL-LABEL: v_sdiv_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v1 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v1 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v3 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v0 +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; GISEL-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v0, v4 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 +; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v3, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; GISEL-NEXT: v_xor_b32_e32 v1, v2, v2 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_i32: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v1 +; CGP-NEXT: v_xor_b32_e32 v4, v2, v3 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CGP-NEXT: v_xor_b32_e32 v5, v0, v2 +; CGP-NEXT: v_xor_b32_e32 v3, v1, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v5 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v0 +; CGP-NEXT: v_mul_lo_u32 v0, v7, v3 +; CGP-NEXT: v_mul_lo_u32 v8, v7, 0 +; CGP-NEXT: v_mul_hi_u32 v2, v7, v3 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v8 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_lshr_b64 v[1:2], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v7 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v7 +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v8, v2 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v7, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v7, v0 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v5 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v5 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v5, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = sdiv i32 %num, %den + ret i32 %result +} + +; FIXME: This is a workaround for not handling uniform VGPR case. +declare i32 @llvm.amdgcn.readfirstlane(i32) + +define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) { +; GISEL-LABEL: s_sdiv_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_ashr_i32 s2, s0, 31 +; GISEL-NEXT: s_add_i32 s0, s0, s2 +; GISEL-NEXT: s_add_i32 s1, s1, s2 +; GISEL-NEXT: s_xor_b32 s3, s0, s2 +; GISEL-NEXT: s_xor_b32 s4, s1, s2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GISEL-NEXT: v_mul_lo_u32 v1, v0, s4 +; GISEL-NEXT: v_mul_hi_u32 v2, v0, s4 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v1, v1, v0 +; GISEL-NEXT: v_add_i32_e64 v2, s[0:1], v0, v1 +; GISEL-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_mul_hi_u32 v0, v0, s3 +; GISEL-NEXT: v_mul_lo_u32 v1, v0, s4 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0 +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, s3, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, s3, v1 +; GISEL-NEXT: v_cmp_le_u32_e64 s[0:1], s4, v4 +; GISEL-NEXT: s_and_b64 s[0:1], s[0:1], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GISEL-NEXT: s_xor_b32 s0, s2, s2 +; GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 +; GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GISEL-NEXT: ; return to shader part epilog +; +; CGP-LABEL: s_sdiv_i32: +; CGP: ; %bb.0: +; CGP-NEXT: s_ashr_i32 s2, s0, 31 +; CGP-NEXT: s_ashr_i32 s3, s1, 31 +; CGP-NEXT: s_xor_b32 s5, s2, s3 +; CGP-NEXT: s_add_i32 s0, s0, s2 +; CGP-NEXT: s_add_i32 s1, s1, s3 +; CGP-NEXT: s_xor_b32 s2, s0, s2 +; CGP-NEXT: s_xor_b32 s4, s1, s3 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, s4 +; CGP-NEXT: s_bfe_u64 s[0:1], s[4:5], 0x200000 +; CGP-NEXT: s_bfe_u64 s[6:7], s[2:3], 0x200000 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, 0, s0 +; CGP-NEXT: v_mul_lo_u32 v3, 0, s6 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v0 +; CGP-NEXT: v_mul_lo_u32 v0, v4, s0 +; CGP-NEXT: v_mul_lo_u32 v2, v4, s1 +; CGP-NEXT: v_mul_hi_u32 v5, v4, s0 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_lshr_b64 v[1:2], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e64 v2, s[0:1], v6, v2 +; CGP-NEXT: v_add_i32_e64 v1, s[0:1], v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_add_i32_e64 v1, s[0:1], v4, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[0:1], v4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, s6 +; CGP-NEXT: v_mul_lo_u32 v2, v1, s7 +; CGP-NEXT: v_mul_hi_u32 v1, v1, s6 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_mul_lo_u32 v1, v0, s4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, s2, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, s2, v1 +; CGP-NEXT: v_cmp_le_u32_e64 s[0:1], s4, v4 +; CGP-NEXT: s_and_b64 s[0:1], s[0:1], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; CGP-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v0, s5, v0 +; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s5, v0 +; CGP-NEXT: v_readfirstlane_b32 s0, v0 +; CGP-NEXT: ; return to shader part epilog + %result = sdiv i32 %num, %den + %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result) + ret i32 %readlane +} + +define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) { +; GISEL-LABEL: v_sdiv_v2i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v6, v4, v4 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_xor_b32_e32 v7, v5, v5 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v8, v4, v2 +; GISEL-NEXT: v_mul_hi_u32 v9, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v11, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v5 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v4, v8 +; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v9 +; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_mul_lo_u32 v8, v4, v2 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v5 +; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v8 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v8 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 +; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v9, v4, s[6:7] +; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v12, v5, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5] +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v6 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v7 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_v2i32: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v2 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_xor_b32_e32 v8, v4, v5 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; CGP-NEXT: v_xor_b32_e32 v9, v6, v7 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_xor_b32_e32 v10, v0, v4 +; CGP-NEXT: v_xor_b32_e32 v11, v2, v5 +; CGP-NEXT: v_xor_b32_e32 v6, v1, v6 +; CGP-NEXT: v_xor_b32_e32 v7, v3, v7 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v11 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v11 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v10 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v7 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v13, 0, v6 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v2 +; CGP-NEXT: v_mul_lo_u32 v0, v14, v11 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v14, v11 +; CGP-NEXT: v_mul_lo_u32 v2, v15, v7 +; CGP-NEXT: v_mul_lo_u32 v17, v15, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v15, v7 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v16 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v17 +; CGP-NEXT: v_sub_i32_e32 v19, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_lshr_b64 v[4:5], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_lshr_b64 v[3:4], v[2:3], 32 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v18, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v3, v2, v19, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v4, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v2, v3, v15 +; CGP-NEXT: v_mul_lo_u32 v5, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v15 +; CGP-NEXT: v_add_i32_e64 v4, s[6:7], v16, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v17, v5 +; CGP-NEXT: v_add_i32_e64 v1, s[6:7], v4, v1 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v5, v3 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_add_i32_e64 v2, s[6:7], v14, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v14, v0 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v15, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v15, v1 +; CGP-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v3, v2, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v2, v1, v6 +; CGP-NEXT: v_mul_lo_u32 v5, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v14, v1, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_mul_lo_u32 v2, v0, v11 +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 1, v0 +; CGP-NEXT: v_mul_lo_u32 v5, v1, v7 +; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v1 +; CGP-NEXT: v_subrev_i32_e32 v13, vcc, 1, v1 +; CGP-NEXT: v_sub_i32_e32 v14, vcc, v10, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v10, v2 +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v6, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v11 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v7 +; CGP-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[6:7] +; CGP-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v12, s[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5] +; CGP-NEXT: v_xor_b32_e32 v0, v0, v8 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v9 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = sdiv <2 x i32> %num, %den + ret <2 x i32> %result +} + +define i32 @v_sdiv_i32_pow2k_denom(i32 %num) { +; CHECK-LABEL: v_sdiv_i32_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0x1000, v1 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_xor_b32_e32 v2, v2, v1 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v2 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3 +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v2 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v3, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v1 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = sdiv i32 %num, 4096 + ret i32 %result +} + +define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) { +; CHECK-LABEL: v_sdiv_v2i32_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; CHECK-NEXT: v_mov_b32_e32 v3, 0x1000 +; CHECK-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, s4, v2 +; CHECK-NEXT: v_xor_b32_e32 v6, v2, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_xor_b32_e32 v7, v4, v4 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v2, v5, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v4 +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v4 +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 +; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; CHECK-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v2 +; CHECK-NEXT: v_mul_hi_u32 v9, v4, v2 +; CHECK-NEXT: v_mul_lo_u32 v10, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3 +; CHECK-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 +; CHECK-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v8, v8, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, v9, v5 +; CHECK-NEXT: v_add_i32_e64 v10, s[6:7], v4, v8 +; CHECK-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v8 +; CHECK-NEXT: v_add_i32_e64 v8, s[6:7], v5, v9 +; CHECK-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v2 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; CHECK-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4 +; CHECK-NEXT: v_mul_lo_u32 v11, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v12, vcc, 1, v5 +; CHECK-NEXT: v_subrev_i32_e32 v13, vcc, 1, v5 +; CHECK-NEXT: v_sub_i32_e32 v14, vcc, v0, v8 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v8 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 +; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v9, v4, s[6:7] +; CHECK-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v1, v12, v5, s[6:7] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5] +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v6 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = sdiv <2 x i32> %num, + ret <2 x i32> %result +} + +define i32 @v_sdiv_i32_oddk_denom(i32 %num) { +; CHECK-LABEL: v_sdiv_i32_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0x12d8fb, v1 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_xor_b32_e32 v2, v2, v1 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v2 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3 +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v2 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v3, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v1 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = sdiv i32 %num, 1235195 + ret i32 %result +} + +define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) { +; CHECK-LABEL: v_sdiv_v2i32_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; CHECK-NEXT: v_mov_b32_e32 v3, 0x12d8fb +; CHECK-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, s4, v2 +; CHECK-NEXT: v_xor_b32_e32 v6, v2, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_xor_b32_e32 v7, v4, v4 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v2, v5, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v4 +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v4 +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 +; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; CHECK-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v2 +; CHECK-NEXT: v_mul_hi_u32 v9, v4, v2 +; CHECK-NEXT: v_mul_lo_u32 v10, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3 +; CHECK-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 +; CHECK-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v8, v8, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, v9, v5 +; CHECK-NEXT: v_add_i32_e64 v10, s[6:7], v4, v8 +; CHECK-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v8 +; CHECK-NEXT: v_add_i32_e64 v8, s[6:7], v5, v9 +; CHECK-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v0 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v2 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; CHECK-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4 +; CHECK-NEXT: v_mul_lo_u32 v11, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v12, vcc, 1, v5 +; CHECK-NEXT: v_subrev_i32_e32 v13, vcc, 1, v5 +; CHECK-NEXT: v_sub_i32_e32 v14, vcc, v0, v8 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v8 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 +; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v9, v4, s[6:7] +; CHECK-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v1, v12, v5, s[6:7] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5] +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v6 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = sdiv <2 x i32> %num, + ret <2 x i32> %result +} + +define i32 @v_sdiv_i32_pow2_shl_denom(i32 %x, i32 %y) { +; CHECK-LABEL: v_sdiv_i32_pow2_shl_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, v1 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3 +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v3, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_xor_b32_e32 v1, v2, v2 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl i32 4096, %y + %r = sdiv i32 %x, %shl.y + ret i32 %r +} + +define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { +; GISEL-LABEL: v_sdiv_v2i32_pow2_shl_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 +; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v6, v4, v4 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_xor_b32_e32 v7, v5, v5 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v8, v4, v2 +; GISEL-NEXT: v_mul_hi_u32 v9, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v11, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v5 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v4, v8 +; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v9 +; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_mul_lo_u32 v8, v4, v2 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v5 +; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v8 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v8 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 +; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v9, v4, s[6:7] +; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v12, v5, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5] +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v6 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v7 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_v2i32_pow2_shl_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s4, 0x1000 +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; CGP-NEXT: v_lshl_b32_e32 v2, s4, v2 +; CGP-NEXT: v_lshl_b32_e32 v3, s4, v3 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v2 +; CGP-NEXT: v_xor_b32_e32 v7, v0, v4 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v3 +; CGP-NEXT: v_xor_b32_e32 v8, v1, v5 +; CGP-NEXT: v_xor_b32_e32 v9, v4, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v6 +; CGP-NEXT: v_mul_lo_u32 v10, 0, v7 +; CGP-NEXT: v_xor_b32_e32 v11, v5, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v0 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v8 +; CGP-NEXT: v_xor_b32_e32 v6, v1, v6 +; CGP-NEXT: v_xor_b32_e32 v13, v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v6 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v6 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v13 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v13 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v2 +; CGP-NEXT: v_mul_lo_u32 v0, v14, v6 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v14, v6 +; CGP-NEXT: v_mul_lo_u32 v2, v15, v13 +; CGP-NEXT: v_mul_lo_u32 v17, v15, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v15, v13 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v16 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v17 +; CGP-NEXT: v_sub_i32_e32 v19, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_lshr_b64 v[4:5], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_lshr_b64 v[3:4], v[2:3], 32 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v18, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v3, v2, v19, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v4, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v2, v3, v15 +; CGP-NEXT: v_mul_lo_u32 v5, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v15 +; CGP-NEXT: v_add_i32_e64 v4, s[6:7], v16, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v17, v5 +; CGP-NEXT: v_add_i32_e64 v1, s[6:7], v4, v1 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v5, v3 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_add_i32_e64 v2, s[6:7], v14, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v14, v0 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v15, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v15, v1 +; CGP-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v3, v2, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v2, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v5, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v14, v1, v8 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_mul_lo_u32 v2, v0, v6 +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 1, v0 +; CGP-NEXT: v_mul_lo_u32 v5, v1, v13 +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v1 +; CGP-NEXT: v_subrev_i32_e32 v12, vcc, 1, v1 +; CGP-NEXT: v_sub_i32_e32 v14, vcc, v7, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v2 +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v8, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v13 +; CGP-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[6:7] +; CGP-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v10, s[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[4:5] +; CGP-NEXT: v_xor_b32_e32 v0, v0, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v11 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v11 +; CGP-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl <2 x i32> , %y + %r = sdiv <2 x i32> %x, %shl.y + ret <2 x i32> %r +} + +define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) { +; GISEL-LABEL: v_sdiv_i32_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 +; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v1 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v1 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v3 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v0 +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v1 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; GISEL-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v0, v4 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v1 +; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v3, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; GISEL-NEXT: v_xor_b32_e32 v1, v2, v2 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_i32_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v3, s4, v0 +; CGP-NEXT: v_and_b32_e32 v4, s4, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v3 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v0 +; CGP-NEXT: v_mul_lo_u32 v0, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v7, v6, 0 +; CGP-NEXT: v_mul_hi_u32 v2, v6, v4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_lshr_b64 v[1:2], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v6 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v6 +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v7, v2 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v6, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v6, v0 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v3 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v5, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v3, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v4 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and i32 %num, 16777215 + %den.mask = and i32 %den, 16777215 + %result = sdiv i32 %num.mask, %den.mask + ret i32 %result +} + +define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { +; GISEL-LABEL: v_sdiv_v2i32_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 +; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 +; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v6, v4, v4 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_xor_b32_e32 v7, v5, v5 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v8, v4, v2 +; GISEL-NEXT: v_mul_hi_u32 v9, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v11, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v5 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v4, v8 +; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v9 +; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_mul_lo_u32 v8, v4, v2 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v5 +; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v8 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v8 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 +; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v9, v4, s[6:7] +; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v12, v5, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5] +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v6 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v7 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_v2i32_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v7, s4, v0 +; CGP-NEXT: v_and_b32_e32 v8, s4, v1 +; CGP-NEXT: v_and_b32_e32 v9, s4, v2 +; CGP-NEXT: v_and_b32_e32 v10, s4, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v9 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v9 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v7 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v10 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v8 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v2 +; CGP-NEXT: v_mul_lo_u32 v0, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v13, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v2, v14, v10 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v14, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v15 +; CGP-NEXT: v_sub_i32_e32 v17, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_lshr_b64 v[4:5], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[5:6], v[2:3], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v17, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v3, v2, v18, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v1, v13 +; CGP-NEXT: v_mul_lo_u32 v4, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v13 +; CGP-NEXT: v_mul_lo_u32 v2, v3, v14 +; CGP-NEXT: v_mul_lo_u32 v5, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v14 +; CGP-NEXT: v_add_i32_e64 v4, s[6:7], v15, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v16, v5 +; CGP-NEXT: v_add_i32_e64 v1, s[6:7], v4, v1 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v5, v3 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_add_i32_e64 v2, s[6:7], v13, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v13, v0 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v14, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v14, v1 +; CGP-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v3, v2, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v2, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v5, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v8 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_mul_lo_u32 v2, v0, v9 +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 1, v0 +; CGP-NEXT: v_mul_lo_u32 v5, v1, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v1 +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, 1, v1 +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v7, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v2 +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v8, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v10 +; CGP-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[6:7] +; CGP-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and <2 x i32> %num, + %den.mask = and <2 x i32> %den, + %result = sdiv <2 x i32> %num.mask, %den.mask + ret <2 x i32> %result +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll @@ -0,0 +1,1053 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s + +; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. + +define i32 @v_srem_i32(i32 %num, i32 %den) { +; GISEL-LABEL: v_srem_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v1 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v1 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v3 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1 +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v4, v1 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v1 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_i32: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_xor_b32_e32 v4, v0, v3 +; CGP-NEXT: v_xor_b32_e32 v5, v1, v2 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v0 +; CGP-NEXT: v_mul_lo_u32 v0, v7, v5 +; CGP-NEXT: v_mul_lo_u32 v8, v7, 0 +; CGP-NEXT: v_mul_hi_u32 v2, v7, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v8 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_lshr_b64 v[1:2], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v7 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v7 +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v8, v2 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v7, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v7, v0 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v5 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v4, v0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v1, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v5 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] +; CGP-NEXT: v_xor_b32_e32 v0, v0, v3 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = srem i32 %num, %den + ret i32 %result +} + +; FIXME: This is a workaround for not handling uniform VGPR case. +declare i32 @llvm.amdgcn.readfirstlane(i32) + +define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) { +; GISEL-LABEL: s_srem_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_ashr_i32 s4, s0, 31 +; GISEL-NEXT: s_add_i32 s0, s0, s4 +; GISEL-NEXT: s_add_i32 s1, s1, s4 +; GISEL-NEXT: s_xor_b32 s2, s0, s4 +; GISEL-NEXT: s_xor_b32 s3, s1, s4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GISEL-NEXT: v_mul_lo_u32 v1, v0, s3 +; GISEL-NEXT: v_mul_hi_u32 v2, v0, s3 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GISEL-NEXT: v_mul_hi_u32 v1, v1, v0 +; GISEL-NEXT: v_add_i32_e64 v2, s[0:1], v0, v1 +; GISEL-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_mul_hi_u32 v0, v0, s2 +; GISEL-NEXT: v_mul_lo_u32 v0, v0, s3 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s2, v0 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GISEL-NEXT: v_add_i32_e64 v2, s[0:1], s3, v1 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[0:1], s2, v0 +; GISEL-NEXT: v_subrev_i32_e64 v0, s[2:3], s3, v1 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; GISEL-NEXT: v_xor_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0 +; GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GISEL-NEXT: ; return to shader part epilog +; +; CGP-LABEL: s_srem_i32: +; CGP: ; %bb.0: +; CGP-NEXT: s_ashr_i32 s5, s0, 31 +; CGP-NEXT: s_ashr_i32 s3, s1, 31 +; CGP-NEXT: s_add_i32 s0, s0, s5 +; CGP-NEXT: s_add_i32 s1, s1, s3 +; CGP-NEXT: s_xor_b32 s2, s0, s5 +; CGP-NEXT: s_xor_b32 s4, s1, s3 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, s4 +; CGP-NEXT: s_bfe_u64 s[0:1], s[4:5], 0x200000 +; CGP-NEXT: s_bfe_u64 s[6:7], s[2:3], 0x200000 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, 0, s0 +; CGP-NEXT: v_mul_lo_u32 v3, 0, s6 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v0 +; CGP-NEXT: v_mul_lo_u32 v0, v4, s0 +; CGP-NEXT: v_mul_lo_u32 v2, v4, s1 +; CGP-NEXT: v_mul_hi_u32 v5, v4, s0 +; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_lshr_b64 v[1:2], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e64 v2, s[0:1], v6, v2 +; CGP-NEXT: v_add_i32_e64 v1, s[0:1], v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_add_i32_e64 v1, s[0:1], v4, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[0:1], v4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, s6 +; CGP-NEXT: v_mul_lo_u32 v2, v1, s7 +; CGP-NEXT: v_mul_hi_u32 v1, v1, s6 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_mul_lo_u32 v0, v0, s4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, s2, v0 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; CGP-NEXT: v_add_i32_e64 v2, s[0:1], s4, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[0:1], s2, v0 +; CGP-NEXT: v_subrev_i32_e64 v0, s[2:3], s4, v1 +; CGP-NEXT: s_and_b64 vcc, vcc, s[0:1] +; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; CGP-NEXT: v_xor_b32_e32 v0, s5, v0 +; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s5, v0 +; CGP-NEXT: v_readfirstlane_b32 s0, v0 +; CGP-NEXT: ; return to shader part epilog + %result = srem i32 %num, %den + %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result) + ret i32 %readlane +} + +define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) { +; GISEL-LABEL: v_srem_v2i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2 +; GISEL-NEXT: v_mul_hi_u32 v9, v6, v2 +; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3 +; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v6, v8 +; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9 +; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v6, v0 +; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1 +; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v1, v7 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v8, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3 +; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v9, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7 +; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_v2i32: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v2 +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v3 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_xor_b32_e32 v8, v0, v6 +; CGP-NEXT: v_xor_b32_e32 v9, v2, v4 +; CGP-NEXT: v_xor_b32_e32 v10, v1, v7 +; CGP-NEXT: v_xor_b32_e32 v11, v3, v5 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v9 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v9 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v8 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v11 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v11 +; CGP-NEXT: v_mul_lo_u32 v13, 0, v10 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v2 +; CGP-NEXT: v_mul_lo_u32 v0, v14, v9 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v14, v9 +; CGP-NEXT: v_mul_lo_u32 v2, v15, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v15, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v15, v11 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v16 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v17 +; CGP-NEXT: v_sub_i32_e32 v19, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_lshr_b64 v[4:5], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_lshr_b64 v[3:4], v[2:3], 32 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v18, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v3, v2, v19, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v4, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v2, v3, v15 +; CGP-NEXT: v_mul_lo_u32 v5, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v15 +; CGP-NEXT: v_add_i32_e64 v4, s[6:7], v16, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v17, v5 +; CGP-NEXT: v_add_i32_e64 v1, s[6:7], v4, v1 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v5, v3 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_add_i32_e64 v2, s[6:7], v14, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v14, v0 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v15, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v15, v1 +; CGP-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v3, v2, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v2, v1, v10 +; CGP-NEXT: v_mul_lo_u32 v5, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v14, v1, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v9 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v11 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v0 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v10, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v9 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v2, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v2, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v3, v11 +; CGP-NEXT: v_add_i32_e64 v5, s[8:9], v3, v11 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v10, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[10:11], v3, v11 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CGP-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9] +; CGP-NEXT: v_xor_b32_e32 v0, v0, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v7 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = srem <2 x i32> %num, %den + ret <2 x i32> %result +} + +define i32 @v_srem_i32_pow2k_denom(i32 %num) { +; CHECK-LABEL: v_srem_i32_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0x1000, v1 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_xor_b32_e32 v2, v2, v1 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v2 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v4, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v2 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = srem i32 %num, 4096 + ret i32 %result +} + +define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) { +; CHECK-LABEL: v_srem_v2i32_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; CHECK-NEXT: v_mov_b32_e32 v3, 0x1000 +; CHECK-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, s4, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v5, v5, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v4 +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v4 +; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v5 +; CHECK-NEXT: v_cvt_f32_u32_e32 v7, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; CHECK-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 +; CHECK-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7 +; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CHECK-NEXT: v_mul_lo_u32 v8, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v9, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v7, v3 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v3 +; CHECK-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 +; CHECK-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v8, v8, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v9, v7 +; CHECK-NEXT: v_add_i32_e64 v10, s[6:7], v6, v8 +; CHECK-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 +; CHECK-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9 +; CHECK-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v6, v6, v0 +; CHECK-NEXT: v_mul_hi_u32 v7, v7, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v7, v3 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, v0, v6 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, v1, v7 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v8, v5 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6 +; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v5 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3 +; CHECK-NEXT: v_add_i32_e64 v5, s[8:9], v9, v3 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7 +; CHECK-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; CHECK-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; CHECK-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9] +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = srem <2 x i32> %num, + ret <2 x i32> %result +} + +define i32 @v_srem_i32_oddk_denom(i32 %num) { +; CHECK-LABEL: v_srem_i32_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0x12d8fb, v1 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_xor_b32_e32 v2, v2, v1 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v2 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v4, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v2 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = srem i32 %num, 1235195 + ret i32 %result +} + +define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) { +; CHECK-LABEL: v_srem_v2i32_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; CHECK-NEXT: v_mov_b32_e32 v3, 0x12d8fb +; CHECK-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, s4, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v5, v5, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v4 +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v4 +; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v5 +; CHECK-NEXT: v_cvt_f32_u32_e32 v7, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; CHECK-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 +; CHECK-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7 +; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CHECK-NEXT: v_mul_lo_u32 v8, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v9, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v7, v3 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v3 +; CHECK-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 +; CHECK-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v8, v8, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v9, v7 +; CHECK-NEXT: v_add_i32_e64 v10, s[6:7], v6, v8 +; CHECK-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 +; CHECK-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9 +; CHECK-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v6, v6, v0 +; CHECK-NEXT: v_mul_hi_u32 v7, v7, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v7, v3 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, v0, v6 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, v1, v7 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v8, v5 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6 +; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v5 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3 +; CHECK-NEXT: v_add_i32_e64 v5, s[8:9], v9, v3 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7 +; CHECK-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; CHECK-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; CHECK-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9] +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = srem <2 x i32> %num, + ret <2 x i32> %result +} + +define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) { +; CHECK-LABEL: v_srem_i32_pow2_shl_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v4, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v1 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl i32 4096, %y + %r = srem i32 %x, %shl.y + ret i32 %r +} + +define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { +; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 +; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2 +; GISEL-NEXT: v_mul_hi_u32 v9, v6, v2 +; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3 +; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v6, v8 +; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9 +; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v6, v0 +; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1 +; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v1, v7 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v8, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3 +; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v9, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7 +; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_v2i32_pow2_shl_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s4, 0x1000 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; CGP-NEXT: v_lshl_b32_e32 v2, s4, v2 +; CGP-NEXT: v_lshl_b32_e32 v3, s4, v3 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v2 +; CGP-NEXT: v_xor_b32_e32 v8, v0, v6 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v3 +; CGP-NEXT: v_xor_b32_e32 v9, v1, v7 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v4 +; CGP-NEXT: v_mul_lo_u32 v10, 0, v8 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v0 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v9 +; CGP-NEXT: v_xor_b32_e32 v12, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v13, v2, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v12 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v12 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v13 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v13 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v2 +; CGP-NEXT: v_mul_lo_u32 v0, v14, v12 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v14, v12 +; CGP-NEXT: v_mul_lo_u32 v2, v15, v13 +; CGP-NEXT: v_mul_lo_u32 v17, v15, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v15, v13 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v16 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v17 +; CGP-NEXT: v_sub_i32_e32 v19, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_lshr_b64 v[4:5], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_lshr_b64 v[3:4], v[2:3], 32 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v18, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v3, v2, v19, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v4, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v14 +; CGP-NEXT: v_mul_lo_u32 v2, v3, v15 +; CGP-NEXT: v_mul_lo_u32 v5, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v15 +; CGP-NEXT: v_add_i32_e64 v4, s[6:7], v16, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v17, v5 +; CGP-NEXT: v_add_i32_e64 v1, s[6:7], v4, v1 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v5, v3 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_add_i32_e64 v2, s[6:7], v14, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v14, v0 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v15, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v15, v1 +; CGP-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v3, v2, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v2, v1, v9 +; CGP-NEXT: v_mul_lo_u32 v5, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v14, v1, v9 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v12 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v13 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v0 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v9, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v12 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v2, v12 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v2, v12 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v3, v13 +; CGP-NEXT: v_add_i32_e64 v5, s[8:9], v3, v13 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v9, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[10:11], v3, v13 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CGP-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9] +; CGP-NEXT: v_xor_b32_e32 v0, v0, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v7 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl <2 x i32> , %y + %r = srem <2 x i32> %x, %shl.y + ret <2 x i32> %r +} + +define i32 @v_srem_i32_24bit(i32 %num, i32 %den) { +; GISEL-LABEL: v_srem_i32_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 +; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GISEL-NEXT: v_mul_lo_u32 v4, v3, v1 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v1 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v3 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1 +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 +; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v4, v1 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v1 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_i32_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v3, s4, v0 +; CGP-NEXT: v_and_b32_e32 v4, s4, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v3 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v0 +; CGP-NEXT: v_mul_lo_u32 v0, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v7, v6, 0 +; CGP-NEXT: v_mul_hi_u32 v2, v6, v4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_lshr_b64 v[1:2], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v6 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v6 +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v7, v2 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v6, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v6, v0 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v3 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v3, v0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v1, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v4 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and i32 %num, 16777215 + %den.mask = and i32 %den, 16777215 + %result = srem i32 %num.mask, %den.mask + ret i32 %result +} + +define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { +; GISEL-LABEL: v_srem_v2i32_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 +; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 +; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2 +; GISEL-NEXT: v_mul_hi_u32 v9, v6, v2 +; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3 +; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v6, v8 +; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9 +; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v6, v0 +; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1 +; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v1, v7 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v8, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3 +; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v9, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7 +; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_v2i32_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v7, s4, v0 +; CGP-NEXT: v_and_b32_e32 v8, s4, v1 +; CGP-NEXT: v_and_b32_e32 v9, s4, v2 +; CGP-NEXT: v_and_b32_e32 v10, s4, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v9 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v9 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v7 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v10 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v8 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v2 +; CGP-NEXT: v_mul_lo_u32 v0, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v13, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v2, v14, v10 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v14, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v15 +; CGP-NEXT: v_sub_i32_e32 v17, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_lshr_b64 v[4:5], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[5:6], v[2:3], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v17, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v3, v2, v18, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v1, v13 +; CGP-NEXT: v_mul_lo_u32 v4, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v13 +; CGP-NEXT: v_mul_lo_u32 v2, v3, v14 +; CGP-NEXT: v_mul_lo_u32 v5, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v14 +; CGP-NEXT: v_add_i32_e64 v4, s[6:7], v15, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v16, v5 +; CGP-NEXT: v_add_i32_e64 v1, s[6:7], v4, v1 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v5, v3 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_add_i32_e64 v2, s[6:7], v13, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v13, v0 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v14, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v14, v1 +; CGP-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v3, v2, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v2, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v5, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v8 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v9 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v10 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v0 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v9 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v2, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v2, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v3, v10 +; CGP-NEXT: v_add_i32_e64 v5, s[8:9], v3, v10 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v8, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[10:11], v3, v10 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CGP-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9] +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and <2 x i32> %num, + %den.mask = and <2 x i32> %den, + %result = srem <2 x i32> %num.mask, %den.mask + ret <2 x i32> %result +}