Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -89,6 +89,8 @@ bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; bool loadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg, Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1628,6 +1628,9 @@ .clampScalar(0, S32, S64) .lower(); + getActionDefinitionsBuilder({G_ROTR, G_ROTL}) + .custom(); + // TODO: Only Try to form v2s16 with legal packed instructions. getActionDefinitionsBuilder(G_FSHR) .legalFor({{S32, S32}}) @@ -1760,6 +1763,9 @@ return legalizeFFloor(MI, MRI, B); case TargetOpcode::G_BUILD_VECTOR: return legalizeBuildVector(MI, MRI, B); + case TargetOpcode::G_ROTL: + case TargetOpcode::G_ROTR: + return legalizeRotate(MI, MRI, B); default: return false; } @@ -2745,6 +2751,27 @@ return true; } +bool AMDGPULegalizerInfo::legalizeRotate( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { + + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(Dst); + unsigned Opc = TargetOpcode::G_FSHR; + + if (MI.getOpcode() == TargetOpcode::G_ROTL) { + if (isPowerOf2_64(DstTy.getScalarSizeInBits())) + Src1 = B.buildFNeg(DstTy, Src1).getReg(0); + else + Opc = TargetOpcode::G_FSHL; + } + + B.buildInstr(Opc, {Dst}, {Src0, Src0, Src1}); + MI.eraseFromParent(); + return true; +} + // Check that this is a G_XOR x, -1 static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::G_XOR) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir @@ -0,0 +1,505 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=GFX,GFX6 +# RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=GFX,GFX8 + +--- +name: rotl_i15 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX6-LABEL: name: rotl_i15 + ; GFX6: liveins: $sgpr0, $sgpr1 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[AND1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY4]], [[COPY5]] + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[AND2]](s32) + ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[AND5]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY12]], [[COPY13]] + ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX6: $sgpr0 = COPY [[COPY14]](s32) + ; GFX8-LABEL: name: rotl_i15 + ; GFX8: liveins: $sgpr0, $sgpr1 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) + ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[TRUNC]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC2]](s16) + ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX8: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C6]] + ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC3]](s16) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB4]](s16) + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; GFX8: [[COPY6:%[0-9]+]]:_(s16) = COPY [[LSHR]](s16) + ; GFX8: [[AND6:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C6]] + ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND5]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND6]], [[TRUNC4]](s16) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX8: $sgpr0 = COPY [[COPY7]](s32) + %2:_(s32) = COPY $sgpr0 + %0:_(s15) = G_TRUNC %2(s32) + %3:_(s32) = COPY $sgpr1 + %1:_(s15) = G_TRUNC %3(s32) + %5:_(s15) = G_ROTL %0, %1(s15) + %4:_(s32) = G_ANYEXT %5(s15) + $sgpr0 = COPY %4 + +... +--- +name: rotl_i16 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX6-LABEL: name: rotl_i16 + ; GFX6: liveins: $sgpr0, $sgpr1 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[FNEG]], [[C]] + ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX6: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[FNEG]], [[C1]] + ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[ZEXT]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6: $sgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: rotl_i16 + ; GFX8: liveins: $sgpr0, $sgpr1 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]] + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[FNEG]], [[C]] + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[FNEG]], [[C1]] + ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8: $sgpr0 = COPY [[ANYEXT]](s32) + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $sgpr1 + %1:_(s16) = G_TRUNC %3(s32) + %5:_(s16) = G_ROTL %0, %1(s16) + %4:_(s32) = G_ANYEXT %5(s16) + $sgpr0 = COPY %4 + +... +--- +name: rotl_i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX-LABEL: name: rotl_i32 + ; GFX: liveins: $sgpr0, $sgpr1 + ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]] + ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY]], [[FNEG]](s32) + ; GFX: $sgpr0 = COPY [[FSHR]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ROTL %0, %1(s32) + $sgpr0 = COPY %2 + +... +--- +name: rotl_i31 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX-LABEL: name: rotl_i31 + ; GFX: liveins: $sgpr0, $sgpr1 + ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; GFX: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[AND1]] + ; GFX: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX: [[COPY5:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[COPY4]], [[COPY5]] + ; GFX: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; GFX: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[AND2]](s32) + ; GFX: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; GFX: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; GFX: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; GFX: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; GFX: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; GFX: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[AND5]](s32) + ; GFX: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; GFX: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY12]], [[COPY13]] + ; GFX: [[COPY14:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX: $sgpr0 = COPY [[COPY14]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s31) = G_TRUNC %0(s32) + %3:_(s31) = G_TRUNC %1(s32) + %4:_(s31) = G_ROTL %2, %3(s31) + %5:_(s32) = G_ANYEXT %4(s31) + $sgpr0 = COPY %5 + +... +--- +name: rotl_i64 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; GFX-LABEL: name: rotl_i64 + ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; GFX: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1 + ; GFX: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3 + ; GFX: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] + ; GFX: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX: [[AND:%[0-9]+]]:_(s64) = G_AND [[FNEG]], [[C]] + ; GFX: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[FNEG]], [[C1]] + ; GFX: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; GFX: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) + ; GFX: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) + ; GFX: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC1]](s32) + ; GFX: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] + ; GFX: $sgpr0_sgpr1 = COPY [[OR]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ROTL %0, %1(s64) + $sgpr0_sgpr1 = COPY %2 + +... +--- +name: rotl_v4i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + + ; GFX-LABEL: name: rotl_v4i32 + ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UV]] + ; GFX: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[UV1]] + ; GFX: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[UV2]] + ; GFX: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[UV3]] + ; GFX: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV4]], [[UV8]], [[FNEG]](s32) + ; GFX: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV5]], [[UV9]], [[FNEG1]](s32) + ; GFX: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV6]], [[UV10]], [[FNEG2]](s32) + ; GFX: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[UV7]], [[UV11]], [[FNEG3]](s32) + ; GFX: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32), [[FSHR2]](s32), [[FSHR3]](s32) + ; GFX: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:_(<4 x s32>) = G_ROTL %0, %1(<4 x s32>) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2 + +... +--- +name: rotr_i16 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX6-LABEL: name: rotr_i16 + ; GFX6: liveins: $sgpr0, $sgpr1 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX6: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] + ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[ZEXT]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6: $sgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: rotr_i16 + ; GFX8: liveins: $sgpr0, $sgpr1 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C1]] + ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8: $sgpr0 = COPY [[ANYEXT]](s32) + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $sgpr1 + %1:_(s16) = G_TRUNC %3(s32) + %5:_(s16) = G_ROTR %0, %1(s16) + %4:_(s32) = G_ANYEXT %5(s16) + $sgpr0 = COPY %4 + +... +--- +name: rotr_i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX-LABEL: name: rotr_i32 + ; GFX: liveins: $sgpr0, $sgpr1 + ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY]], [[COPY1]](s32) + ; GFX: $sgpr0 = COPY [[FSHR]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ROTR %0, %1(s32) + $sgpr0 = COPY %2 + +... +--- +name: rotr_i64 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; GFX-LABEL: name: rotr_i64 + ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; GFX: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1 + ; GFX: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3 + ; GFX: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY1]], [[C1]] + ; GFX: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; GFX: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) + ; GFX: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) + ; GFX: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC1]](s32) + ; GFX: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] + ; GFX: $sgpr0_sgpr1 = COPY [[OR]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ROTR %0, %1(s64) + $sgpr0_sgpr1 = COPY %2 + +... +--- +name: rotr_v4i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + + ; GFX-LABEL: name: rotr_v4i32 + ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV4]], [[UV8]](s32) + ; GFX: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV5]], [[UV9]](s32) + ; GFX: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV2]], [[UV6]], [[UV10]](s32) + ; GFX: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[UV3]], [[UV7]], [[UV11]](s32) + ; GFX: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32), [[FSHR2]](s32), [[FSHR3]](s32) + ; GFX: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:_(<4 x s32>) = G_ROTR %0, %1(<4 x s32>) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2 + +...