diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1537,6 +1537,14 @@ return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, NegOne}); } + /// Build and insert integer negation + /// \p Zero = G_CONSTANT 0 + /// \p Res = G_SUB Zero, \p Op0 + MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0) { + auto Zero = buildConstant(Dst.getLLTTy(*getMRI()), 0); + return buildInstr(TargetOpcode::G_SUB, {Dst}, {Zero, Src0}); + } + /// Build and insert \p Res = G_CTPOP \p Op0, \p Src0 MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0) { return buildInstr(TargetOpcode::G_CTPOP, {Dst}, {Src0}); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4434,6 +4434,8 @@ case G_FMAXIMUM: case G_FSHL: case G_FSHR: + case G_ROTL: + case G_ROTR: case G_FREEZE: case G_SADDSAT: case G_SSUBSAT: @@ -6079,6 +6081,27 @@ isPowerOf2_32(EltSizeInBits)) return lowerRotateWithReverseRotate(MI); + // If a funnel shift is supported, use it. + unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR; + unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR; + bool IsFShLegal = false; + if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) || + LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) { + auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2, + Register R3) { + MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3}); + MI.eraseFromParent(); + return Legalized; + }; + // If a funnel shift in the other direction is supported, use it. + if (IsFShLegal) { + return buildFunnelShift(FShOpc, Dst, Src, Amt); + } else if (isPowerOf2_32(EltSizeInBits)) { + Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0); + return buildFunnelShift(RevFsh, Dst, Src, Amt); + } + } + auto Zero = MIRBuilder.buildConstant(AmtTy, 0); unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR; unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1626,6 +1626,10 @@ .clampScalar(0, S32, S64) .lower(); + getActionDefinitionsBuilder({G_ROTR, G_ROTL}) + .scalarize(0) + .lower(); + // TODO: Only Try to form v2s16 with legal packed instructions. getActionDefinitionsBuilder(G_FSHR) .legalFor({{S32, S32}}) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir @@ -0,0 +1,462 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=GFX,GFX6 +# RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=GFX,GFX8 + +--- +name: rotl_i15 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX6-LABEL: name: rotl_i15 + ; GFX6: liveins: $sgpr0, $sgpr1 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] + ; GFX6: $sgpr0 = COPY [[OR]](s32) + ; GFX8-LABEL: name: rotl_i15 + ; GFX8: liveins: $sgpr0, $sgpr1 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY2]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]] + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32) + ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[TRUNC2]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; GFX8: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] + ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND2]], [[TRUNC3]](s16) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB4]](s16) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; GFX8: [[AND4:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C6]] + ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]] + ; GFX8: $sgpr0 = COPY [[OR]](s32) + %2:_(s32) = COPY $sgpr0 + %0:_(s15) = G_TRUNC %2(s32) + %3:_(s32) = COPY $sgpr1 + %1:_(s15) = G_TRUNC %3(s32) + %5:_(s15) = G_ROTL %0, %1(s15) + %4:_(s32) = G_ANYEXT %5(s15) + $sgpr0 = COPY %4 + +... +--- +name: rotl_i16 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX6-LABEL: name: rotl_i16 + ; GFX6: liveins: $sgpr0, $sgpr1 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6: $sgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: rotl_i16 + ; GFX8: liveins: $sgpr0, $sgpr1 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]] + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND1]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8: $sgpr0 = COPY [[ANYEXT]](s32) + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $sgpr1 + %1:_(s16) = G_TRUNC %3(s32) + %5:_(s16) = G_ROTL %0, %1(s16) + %4:_(s32) = G_ANYEXT %5(s16) + $sgpr0 = COPY %4 + +... +--- +name: rotl_i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX-LABEL: name: rotl_i32 + ; GFX: liveins: $sgpr0, $sgpr1 + ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY]], [[SUB]](s32) + ; GFX: $sgpr0 = COPY [[FSHR]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ROTL %0, %1(s32) + $sgpr0 = COPY %2 + +... +--- +name: rotl_i31 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX-LABEL: name: rotl_i31 + ; GFX: liveins: $sgpr0, $sgpr1 + ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; GFX: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32) + ; GFX: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]] + ; GFX: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]] + ; GFX: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]] + ; GFX: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]] + ; GFX: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]] + ; GFX: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]] + ; GFX: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; GFX: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) + ; GFX: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; GFX: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32) + ; GFX: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; GFX: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; GFX: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; GFX: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] + ; GFX: $sgpr0 = COPY [[OR]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s31) = G_TRUNC %0(s32) + %3:_(s31) = G_TRUNC %1(s32) + %4:_(s31) = G_ROTL %2, %3(s31) + %5:_(s32) = G_ANYEXT %4(s31) + $sgpr0 = COPY %5 + +... +--- +name: rotl_i64 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; GFX-LABEL: name: rotl_i64 + ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; GFX: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1 + ; GFX: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3 + ; GFX: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; GFX: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; GFX: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] + ; GFX: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC1]](s32) + ; GFX: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; GFX: $sgpr0_sgpr1 = COPY [[OR]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ROTL %0, %1(s64) + $sgpr0_sgpr1 = COPY %2 + +... +--- +name: rotl_v4i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + + ; GFX-LABEL: name: rotl_v4i32 + ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV4]] + ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV]], [[SUB]](s32) + ; GFX: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV5]] + ; GFX: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV1]], [[SUB1]](s32) + ; GFX: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV6]] + ; GFX: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV2]], [[UV2]], [[SUB2]](s32) + ; GFX: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV7]] + ; GFX: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[UV3]], [[UV3]], [[SUB3]](s32) + ; GFX: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32), [[FSHR2]](s32), [[FSHR3]](s32) + ; GFX: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:_(<4 x s32>) = G_ROTL %0, %1(<4 x s32>) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2 + +... +--- +name: rotr_i16 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX6-LABEL: name: rotr_i16 + ; GFX6: liveins: $sgpr0, $sgpr1 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND2]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT1]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6: $sgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: rotr_i16 + ; GFX8: liveins: $sgpr0, $sgpr1 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) + ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND1]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8: $sgpr0 = COPY [[ANYEXT]](s32) + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $sgpr1 + %1:_(s16) = G_TRUNC %3(s32) + %5:_(s16) = G_ROTR %0, %1(s16) + %4:_(s32) = G_ANYEXT %5(s16) + $sgpr0 = COPY %4 + +... +--- +name: rotr_i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX-LABEL: name: rotr_i32 + ; GFX: liveins: $sgpr0, $sgpr1 + ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY]], [[COPY1]](s32) + ; GFX: $sgpr0 = COPY [[FSHR]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ROTR %0, %1(s32) + $sgpr0 = COPY %2 + +... +--- +name: rotr_i64 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; GFX-LABEL: name: rotr_i64 + ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; GFX: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1 + ; GFX: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3 + ; GFX: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; GFX: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; GFX: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) + ; GFX: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] + ; GFX: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC1]](s32) + ; GFX: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] + ; GFX: $sgpr0_sgpr1 = COPY [[OR]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ROTR %0, %1(s64) + $sgpr0_sgpr1 = COPY %2 + +... +--- +name: rotr_v4i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + + ; GFX-LABEL: name: rotr_v4i32 + ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV]], [[UV4]](s32) + ; GFX: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV1]], [[UV5]](s32) + ; GFX: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV2]], [[UV2]], [[UV6]](s32) + ; GFX: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[UV3]], [[UV3]], [[UV7]](s32) + ; GFX: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32), [[FSHR2]](s32), [[FSHR3]](s32) + ; GFX: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:_(<4 x s32>) = G_ROTR %0, %1(<4 x s32>) + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2 + +...