Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4146,6 +4146,8 @@ case G_FMAXIMUM: case G_FSHL: case G_FSHR: + case G_ROTL: + case G_ROTR: case G_FREEZE: case G_SADDSAT: case G_SSUBSAT: Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1624,7 +1624,7 @@ .lower(); // TODO: Only Try to form v2s16 with legal packed instructions. - getActionDefinitionsBuilder(G_FSHR) + getActionDefinitionsBuilder({G_FSHR, G_ROTR}) .legalFor({{S32, S32}}) .lowerFor({{V2S16, V2S16}}) .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) @@ -1632,13 +1632,13 @@ .lower(); if (ST.hasVOP3PInsts()) { - getActionDefinitionsBuilder(G_FSHL) + getActionDefinitionsBuilder({G_FSHL, G_ROTL}) .lowerFor({{V2S16, V2S16}}) .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) .scalarize(0) .lower(); } else { - getActionDefinitionsBuilder(G_FSHL) + getActionDefinitionsBuilder({G_FSHL, G_ROTL}) .scalarize(0) .lower(); } Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3562,6 +3562,8 @@ case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar? case AMDGPU::G_FSHR: // TODO: Expand for scalar + case AMDGPU::G_ROTL: + case AMDGPU::G_ROTR: case AMDGPU::G_AMDGPU_FFBH_U32: case AMDGPU::G_AMDGPU_FMIN_LEGACY: case AMDGPU::G_AMDGPU_FMAX_LEGACY: Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -341,6 +341,11 @@ defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +def : GCNPat < + (rotr i32:$src0, i32:$src1), + (V_ALIGNBIT_B32_e64 VGPR_32:$src0, VGPR_32:$src0, VGPR_32:$src1) +>; + // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { let isCommutable = 1 in { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/rotate_pre_instselect.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/rotate_pre_instselect.mir @@ -0,0 +1,752 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX10 %s + +--- | + ; ModuleID = '../llvm/test/CodeGen/AMDGPU/GlobalISel/rotl_rotr.ll' + source_filename = "../llvm/test/CodeGen/AMDGPU/GlobalISel/rotl_rotr.ll" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" + + define amdgpu_ps i16 @s_rotl_rotr_i16(i16 inreg %lhs, i16 inreg %amt) #0 { + %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %lhs, i16 %amt) + ret i16 %result + } + + define amdgpu_ps i32 @s_rotl_rotr_i32(i32 inreg %lhs, i32 inreg %amt) #0 { + %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %lhs, i32 %amt) + ret i32 %result + } + + define amdgpu_ps i64 @s_rotl_rotr_i64(i64 inreg %lhs, i64 inreg %amt) #0 { + %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %lhs, i64 %amt) + ret i64 %result + } + + define amdgpu_ps <4 x i32> @s_rotl_rotr_i33(<4 x i32> inreg %lhs, <4 x i32> inreg %amt) #0 { + %result = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %lhs, <4 x i32> %lhs, <4 x i32> %amt) + ret <4 x i32> %result + } + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare i16 @llvm.fshl.i16(i16, i16, i16) #1 + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare i32 @llvm.fshl.i32(i32, i32, i32) #1 + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare i64 @llvm.fshl.i64(i64, i64, i64) #1 + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #2 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #2 + + ; Function Attrs: convergent nounwind readnone willreturn + declare i64 @llvm.amdgcn.if.break.i64(i1, i64) #3 + + ; Function Attrs: convergent nounwind willreturn + declare i1 @llvm.amdgcn.loop.i64(i64) #2 + + ; Function Attrs: convergent nounwind willreturn + declare void @llvm.amdgcn.end.cf.i64(i64) #2 + + attributes #0 = { "target-cpu"="gfx900" } + attributes #1 = { nofree nosync nounwind readnone speculatable willreturn "target-cpu"="gfx900" } + attributes #2 = { convergent nounwind willreturn } + attributes #3 = { convergent nounwind readnone willreturn } + +... +--- +name: s_rotl_rotr_i16 +alignment: 1 +exposesReturnsTwice: false +legalized: true +regBankSelected: true +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: sgpr, preferred-register: '' } + - { id: 1, class: sgpr, preferred-register: '' } + - { id: 2, class: sgpr, preferred-register: '' } + - { id: 3, class: sgpr, preferred-register: '' } + - { id: 4, class: sgpr_32, preferred-register: '' } + - { id: 5, class: sgpr, preferred-register: '' } + - { id: 6, class: sgpr, preferred-register: '' } + - { id: 7, class: sgpr, preferred-register: '' } + - { id: 8, class: sgpr, preferred-register: '' } + - { id: 9, class: sgpr, preferred-register: '' } + - { id: 10, class: sgpr, preferred-register: '' } + - { id: 11, class: sgpr, preferred-register: '' } + - { id: 12, class: sgpr, preferred-register: '' } + - { id: 13, class: sgpr, preferred-register: '' } + - { id: 14, class: sgpr, preferred-register: '' } + - { id: 15, class: sgpr, preferred-register: '' } + - { id: 16, class: sgpr, preferred-register: '' } + - { id: 17, class: sgpr, preferred-register: '' } + - { id: 18, class: sgpr, preferred-register: '' } + - { id: 19, class: sgpr, preferred-register: '' } + - { id: 20, class: sgpr, preferred-register: '' } + - { id: 21, class: sgpr, preferred-register: '' } + - { id: 22, class: sgpr, preferred-register: '' } + - { id: 23, class: sgpr, preferred-register: '' } + - { id: 24, class: vgpr, preferred-register: '' } +liveins: + - { reg: '$sgpr2', virtual-reg: '%4' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr2' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_rotl_rotr_i16 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 15 + ; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY1]], implicit-def $scc + ; GFX9: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX9: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_AND_B32_]], 1048576, implicit-def $scc + ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_BFE_U32_]], implicit-def $scc + ; GFX9: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_SUB_I32_]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX9: [[S_BFE_U32_1:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc + ; GFX9: [[S_BFE_U32_2:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_AND_B32_1]], 1048576, implicit-def $scc + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_BFE_U32_1]], [[S_BFE_U32_2]], implicit-def $scc + ; GFX9: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_LSHR_B32_]], implicit-def dead $scc + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]] + ; GFX9: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX9: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX10-LABEL: name: s_rotl_rotr_i16 + ; GFX10: liveins: $sgpr0, $sgpr1 + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 15 + ; GFX10: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY1]], implicit-def $scc + ; GFX10: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX10: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_AND_B32_]], 1048576, implicit-def $scc + ; GFX10: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_BFE_U32_]], implicit-def $scc + ; GFX10: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_SUB_I32_]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX10: [[S_BFE_U32_1:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc + ; GFX10: [[S_BFE_U32_2:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_AND_B32_1]], 1048576, implicit-def $scc + ; GFX10: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_BFE_U32_1]], [[S_BFE_U32_2]], implicit-def $scc + ; GFX10: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_LSHR_B32_]], implicit-def dead $scc + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]] + ; GFX10: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec + ; GFX10: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0 + %2:sgpr(s32) = COPY $sgpr0 + %0:sgpr(s16) = G_TRUNC %2(s32) + %3:sgpr(s32) = COPY $sgpr1 + %1:sgpr(s16) = G_TRUNC %3(s32) + %8:sgpr(s16) = G_CONSTANT i16 0 + %9:sgpr(s16) = G_CONSTANT i16 15 + %15:sgpr(s32) = G_ANYEXT %8(s16) + %16:sgpr(s32) = G_ANYEXT %1(s16) + %17:sgpr(s32) = G_SUB %15, %16 + %10:sgpr(s16) = G_TRUNC %17(s32) + %11:sgpr(s16) = G_AND %1, %9 + %18:sgpr(s32) = G_ANYEXT %0(s16) + %20:sgpr(s32) = G_ZEXT %11(s16) + %19:sgpr(s32) = G_SHL %18, %20(s32) + %12:sgpr(s16) = G_TRUNC %19(s32) + %13:sgpr(s16) = G_AND %10, %9 + %21:sgpr(s32) = G_ZEXT %0(s16) + %23:sgpr(s32) = G_ZEXT %13(s16) + %22:sgpr(s32) = G_LSHR %21, %23(s32) + %14:sgpr(s16) = G_TRUNC %22(s32) + %5:sgpr(s16) = G_OR %12, %14 + %6:sgpr(s32) = G_ANYEXT %5(s16) + %24:vgpr(s32) = COPY %6(s32) + %7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %24(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: s_rotl_rotr_i32 +alignment: 1 +exposesReturnsTwice: false +legalized: true +regBankSelected: true +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: sgpr, preferred-register: '' } + - { id: 1, class: sgpr, preferred-register: '' } + - { id: 2, class: sgpr_32, preferred-register: '' } + - { id: 3, class: vgpr, preferred-register: '' } + - { id: 4, class: sgpr, preferred-register: '' } + - { id: 5, class: sgpr, preferred-register: '' } + - { id: 6, class: sgpr, preferred-register: '' } + - { id: 7, class: vgpr, preferred-register: '' } + - { id: 8, class: vgpr, preferred-register: '' } +liveins: + - { reg: '$sgpr2', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr2' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_rotl_rotr_i32 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY1]], implicit-def $scc + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_SUB_I32_]] + ; GFX9: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY2]], [[COPY2]], [[COPY3]], implicit $exec + ; GFX9: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_ALIGNBIT_B32_e64_]], implicit $exec + ; GFX9: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX10-LABEL: name: s_rotl_rotr_i32 + ; GFX10: liveins: $sgpr0, $sgpr1 + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX10: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY1]], implicit-def $scc + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_SUB_I32_]] + ; GFX10: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY2]], [[COPY2]], [[COPY3]], implicit $exec + ; GFX10: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_ALIGNBIT_B32_e64_]], implicit $exec + ; GFX10: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %5:sgpr(s32) = G_CONSTANT i32 0 + %6:sgpr(s32) = G_SUB %5, %1 + %7:vgpr(s32) = COPY %0(s32) + %8:vgpr(s32) = COPY %6(s32) + %3:vgpr(s32) = G_ROTR %7, %8(s32) + %4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %3(s32) + $sgpr0 = COPY %4(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: s_rotl_rotr_i64 +alignment: 1 +exposesReturnsTwice: false +legalized: true +regBankSelected: true +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: sgpr, preferred-register: '' } + - { id: 1, class: sgpr, preferred-register: '' } + - { id: 2, class: sgpr, preferred-register: '' } + - { id: 3, class: sgpr, preferred-register: '' } + - { id: 4, class: sgpr, preferred-register: '' } + - { id: 5, class: sgpr, preferred-register: '' } + - { id: 6, class: sgpr_32, preferred-register: '' } + - { id: 7, class: sgpr, preferred-register: '' } + - { id: 8, class: sgpr, preferred-register: '' } + - { id: 9, class: sgpr, preferred-register: '' } + - { id: 10, class: sgpr, preferred-register: '' } + - { id: 11, class: sgpr, preferred-register: '' } + - { id: 12, class: _, preferred-register: '' } + - { id: 13, class: sgpr, preferred-register: '' } + - { id: 14, class: sgpr, preferred-register: '' } + - { id: 15, class: sgpr, preferred-register: '' } + - { id: 16, class: sgpr, preferred-register: '' } + - { id: 17, class: sgpr, preferred-register: '' } + - { id: 18, class: sgpr, preferred-register: '' } + - { id: 19, class: sgpr, preferred-register: '' } + - { id: 20, class: sgpr, preferred-register: '' } + - { id: 21, class: sgpr, preferred-register: '' } + - { id: 22, class: sgpr, preferred-register: '' } + - { id: 23, class: _, preferred-register: '' } + - { id: 24, class: _, preferred-register: '' } + - { id: 25, class: sgpr, preferred-register: '' } + - { id: 26, class: sgpr, preferred-register: '' } + - { id: 27, class: sgpr, preferred-register: '' } + - { id: 28, class: sgpr, preferred-register: '' } + - { id: 29, class: sgpr, preferred-register: '' } + - { id: 30, class: sgpr, preferred-register: '' } + - { id: 31, class: sgpr, preferred-register: '' } + - { id: 32, class: vgpr, preferred-register: '' } + - { id: 33, class: vgpr, preferred-register: '' } +liveins: + - { reg: '$sgpr4', virtual-reg: '%6' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr4' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; GFX9-LABEL: name: s_rotl_rotr_i64 + ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX9: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 63 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX9: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[S_MOV_B32_]], [[COPY2]], implicit-def $scc + ; GFX9: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX9: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY4]], 1, implicit-def $scc + ; GFX9: $scc = COPY [[S_AND_B32_]] + ; GFX9: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[S_MOV_B32_1]], [[COPY3]], implicit-def $scc, implicit $scc + ; GFX9: [[COPY5:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_SUB_U32_]], %subreg.sub0, [[S_SUBB_U32_]], %subreg.sub1 + ; GFX9: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[REG_SEQUENCE1]], [[S_MOV_B64_]], implicit-def $scc + ; GFX9: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_AND_B64_]].sub0 + ; GFX9: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[REG_SEQUENCE]], [[COPY6]], implicit-def $scc + ; GFX9: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[REG_SEQUENCE2]], [[S_MOV_B64_]], implicit-def $scc + ; GFX9: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_AND_B64_1]].sub0 + ; GFX9: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[REG_SEQUENCE]], [[COPY7]], implicit-def $scc + ; GFX9: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_LSHL_B64_]], [[S_LSHR_B64_]], implicit-def $scc + ; GFX9: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub0 + ; GFX9: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub1 + ; GFX9: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX9: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX9: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX9: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX9: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX9: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX10-LABEL: name: s_rotl_rotr_i64 + ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX10: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 63 + ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX10: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[S_MOV_B32_]], [[COPY2]], implicit-def $scc + ; GFX10: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX10: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY4]], 1, implicit-def $scc + ; GFX10: $scc = COPY [[S_AND_B32_]] + ; GFX10: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[S_MOV_B32_1]], [[COPY3]], implicit-def $scc, implicit $scc + ; GFX10: [[COPY5:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_SUB_U32_]], %subreg.sub0, [[S_SUBB_U32_]], %subreg.sub1 + ; GFX10: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[REG_SEQUENCE1]], [[S_MOV_B64_]], implicit-def $scc + ; GFX10: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_AND_B64_]].sub0 + ; GFX10: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[REG_SEQUENCE]], [[COPY6]], implicit-def $scc + ; GFX10: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[REG_SEQUENCE2]], [[S_MOV_B64_]], implicit-def $scc + ; GFX10: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_AND_B64_1]].sub0 + ; GFX10: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[REG_SEQUENCE]], [[COPY7]], implicit-def $scc + ; GFX10: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_LSHL_B64_]], [[S_LSHR_B64_]], implicit-def $scc + ; GFX10: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub0 + ; GFX10: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub1 + ; GFX10: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX10: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX10: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX10: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX10: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX10: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %2:sgpr(s32) = COPY $sgpr0 + %3:sgpr(s32) = COPY $sgpr1 + %0:sgpr(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:sgpr(s32) = COPY $sgpr2 + %5:sgpr(s32) = COPY $sgpr3 + %1:sgpr(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %13:sgpr(s64) = G_CONSTANT i64 63 + %21:sgpr(s32) = G_CONSTANT i32 0 + %22:sgpr(s32) = G_CONSTANT i32 0 + %25:sgpr(s32), %29:sgpr(s32) = G_USUBO %21, %4 + %26:sgpr(s1) = G_TRUNC %29(s32) + %31:sgpr(s32) = G_ZEXT %26(s1) + %27:sgpr(s32), %30:sgpr(s32) = G_USUBE %22, %5, %31 + %14:sgpr(s64) = G_MERGE_VALUES %25(s32), %27(s32) + %15:sgpr(s64) = G_AND %1, %13 + %20:sgpr(s32) = G_TRUNC %15(s64) + %16:sgpr(s64) = G_SHL %0, %20(s32) + %17:sgpr(s64) = G_AND %14, %13 + %19:sgpr(s32) = G_TRUNC %17(s64) + %18:sgpr(s64) = G_LSHR %0, %19(s32) + %7:sgpr(s64) = G_OR %16, %18 + %8:sgpr(s32), %9:sgpr(s32) = G_UNMERGE_VALUES %7(s64) + %32:vgpr(s32) = COPY %8(s32) + %10:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %32(s32) + $sgpr0 = COPY %10(s32) + %33:vgpr(s32) = COPY %9(s32) + %11:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %33(s32) + $sgpr1 = COPY %11(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + +... +--- +name: s_rotl_rotr_i33 +alignment: 1 +exposesReturnsTwice: false +legalized: true +regBankSelected: true +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: sgpr, preferred-register: '' } + - { id: 3, class: sgpr, preferred-register: '' } + - { id: 4, class: sgpr, preferred-register: '' } + - { id: 5, class: sgpr, preferred-register: '' } + - { id: 6, class: sgpr, preferred-register: '' } + - { id: 7, class: sgpr, preferred-register: '' } + - { id: 8, class: sgpr, preferred-register: '' } + - { id: 9, class: sgpr, preferred-register: '' } + - { id: 10, class: sgpr_32, preferred-register: '' } + - { id: 11, class: _, preferred-register: '' } + - { id: 12, class: _, preferred-register: '' } + - { id: 13, class: _, preferred-register: '' } + - { id: 14, class: _, preferred-register: '' } + - { id: 15, class: _, preferred-register: '' } + - { id: 16, class: sgpr, preferred-register: '' } + - { id: 17, class: sgpr, preferred-register: '' } + - { id: 18, class: sgpr, preferred-register: '' } + - { id: 19, class: sgpr, preferred-register: '' } + - { id: 20, class: _, preferred-register: '' } + - { id: 21, class: _, preferred-register: '' } + - { id: 22, class: _, preferred-register: '' } + - { id: 23, class: _, preferred-register: '' } + - { id: 24, class: _, preferred-register: '' } + - { id: 25, class: _, preferred-register: '' } + - { id: 26, class: _, preferred-register: '' } + - { id: 27, class: _, preferred-register: '' } + - { id: 28, class: vgpr, preferred-register: '' } + - { id: 29, class: vgpr, preferred-register: '' } + - { id: 30, class: vgpr, preferred-register: '' } + - { id: 31, class: vgpr, preferred-register: '' } + - { id: 32, class: sgpr, preferred-register: '' } + - { id: 33, class: sgpr, preferred-register: '' } + - { id: 34, class: sgpr, preferred-register: '' } + - { id: 35, class: sgpr, preferred-register: '' } + - { id: 36, class: sgpr, preferred-register: '' } + - { id: 37, class: vgpr, preferred-register: '' } + - { id: 38, class: vgpr, preferred-register: '' } + - { id: 39, class: vgpr, preferred-register: '' } + - { id: 40, class: vgpr, preferred-register: '' } + - { id: 41, class: vgpr, preferred-register: '' } + - { id: 42, class: vgpr, preferred-register: '' } + - { id: 43, class: vgpr, preferred-register: '' } + - { id: 44, class: vgpr, preferred-register: '' } +liveins: + - { reg: '$sgpr8', virtual-reg: '%10' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr8' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + + ; GFX9-LABEL: name: s_rotl_rotr_i33 + ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX9: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr7 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY4]], implicit-def $scc + ; GFX9: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX9: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_SUB_I32_]] + ; GFX9: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY8]], [[COPY8]], [[COPY9]], implicit $exec + ; GFX9: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY5]], implicit-def $scc + ; GFX9: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX9: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_SUB_I32_1]] + ; GFX9: [[V_ALIGNBIT_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY10]], [[COPY10]], [[COPY11]], implicit $exec + ; GFX9: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY6]], implicit-def $scc + ; GFX9: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GFX9: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_SUB_I32_2]] + ; GFX9: [[V_ALIGNBIT_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY12]], [[COPY12]], [[COPY13]], implicit $exec + ; GFX9: [[S_SUB_I32_3:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY7]], implicit-def $scc + ; GFX9: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GFX9: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_SUB_I32_3]] + ; GFX9: [[V_ALIGNBIT_B32_e64_3:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY14]], [[COPY14]], [[COPY15]], implicit $exec + ; GFX9: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_ALIGNBIT_B32_e64_]], implicit $exec + ; GFX9: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX9: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_ALIGNBIT_B32_e64_1]], implicit $exec + ; GFX9: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX9: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_ALIGNBIT_B32_e64_2]], implicit $exec + ; GFX9: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX9: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_ALIGNBIT_B32_e64_3]], implicit $exec + ; GFX9: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3 + ; GFX10-LABEL: name: s_rotl_rotr_i33 + ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX10: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX10: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr7 + ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX10: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY4]], implicit-def $scc + ; GFX10: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX10: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_SUB_I32_]] + ; GFX10: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY8]], [[COPY8]], [[COPY9]], implicit $exec + ; GFX10: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY5]], implicit-def $scc + ; GFX10: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX10: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_SUB_I32_1]] + ; GFX10: [[V_ALIGNBIT_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY10]], [[COPY10]], [[COPY11]], implicit $exec + ; GFX10: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY6]], implicit-def $scc + ; GFX10: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GFX10: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_SUB_I32_2]] + ; GFX10: [[V_ALIGNBIT_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY12]], [[COPY12]], [[COPY13]], implicit $exec + ; GFX10: [[S_SUB_I32_3:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_MOV_B32_]], [[COPY7]], implicit-def $scc + ; GFX10: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GFX10: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_SUB_I32_3]] + ; GFX10: [[V_ALIGNBIT_B32_e64_3:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY14]], [[COPY14]], [[COPY15]], implicit $exec + ; GFX10: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_ALIGNBIT_B32_e64_]], implicit $exec + ; GFX10: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX10: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_ALIGNBIT_B32_e64_1]], implicit $exec + ; GFX10: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX10: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_ALIGNBIT_B32_e64_2]], implicit $exec + ; GFX10: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX10: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_ALIGNBIT_B32_e64_3]], implicit $exec + ; GFX10: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3 + %2:sgpr(s32) = COPY $sgpr0 + %3:sgpr(s32) = COPY $sgpr1 + %4:sgpr(s32) = COPY $sgpr2 + %5:sgpr(s32) = COPY $sgpr3 + %6:sgpr(s32) = COPY $sgpr4 + %7:sgpr(s32) = COPY $sgpr5 + %8:sgpr(s32) = COPY $sgpr6 + %9:sgpr(s32) = COPY $sgpr7 + %32:sgpr(s32) = G_CONSTANT i32 0 + %36:sgpr(s32) = G_SUB %32, %6 + %37:vgpr(s32) = COPY %2(s32) + %38:vgpr(s32) = COPY %36(s32) + %28:vgpr(s32) = G_ROTR %37, %38(s32) + %35:sgpr(s32) = G_SUB %32, %7 + %39:vgpr(s32) = COPY %3(s32) + %40:vgpr(s32) = COPY %35(s32) + %29:vgpr(s32) = G_ROTR %39, %40(s32) + %34:sgpr(s32) = G_SUB %32, %8 + %41:vgpr(s32) = COPY %4(s32) + %42:vgpr(s32) = COPY %34(s32) + %30:vgpr(s32) = G_ROTR %41, %42(s32) + %33:sgpr(s32) = G_SUB %32, %9 + %43:vgpr(s32) = COPY %5(s32) + %44:vgpr(s32) = COPY %33(s32) + %31:vgpr(s32) = G_ROTR %43, %44(s32) + %16:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %28(s32) + $sgpr0 = COPY %16(s32) + %17:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %29(s32) + $sgpr1 = COPY %17(s32) + %18:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %30(s32) + $sgpr2 = COPY %18(s32) + %19:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %31(s32) + $sgpr3 = COPY %19(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/rotate_pre_legalization.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/rotate_pre_legalization.mir @@ -0,0 +1,598 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX10 %s + +--- | + ; ModuleID = '../llvm/test/CodeGen/AMDGPU/GlobalISel/rotl_rotr.ll' + source_filename = "../llvm/test/CodeGen/AMDGPU/GlobalISel/rotl_rotr.ll" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" + + define amdgpu_ps i16 @s_rotl_rotr_i16(i16 inreg %lhs, i16 inreg %amt) #0 { + %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %lhs, i16 %amt) + ret i16 %result + } + + define amdgpu_ps i32 @s_rotl_rotr_i32(i32 inreg %lhs, i32 inreg %amt) #0 { + %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %lhs, i32 %amt) + ret i32 %result + } + + define amdgpu_ps i64 @s_rotl_rotr_i64(i64 inreg %lhs, i64 inreg %amt) #0 { + %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %lhs, i64 %amt) + ret i64 %result + } + + define amdgpu_ps <4 x i32> @s_rotl_rotr_i33(<4 x i32> inreg %lhs, <4 x i32> inreg %amt) #0 { + %result = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %lhs, <4 x i32> %lhs, <4 x i32> %amt) + ret <4 x i32> %result + } + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare i16 @llvm.fshl.i16(i16, i16, i16) #1 + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare i32 @llvm.fshl.i32(i32, i32, i32) #1 + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare i64 @llvm.fshl.i64(i64, i64, i64) #1 + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #2 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #2 + + ; Function Attrs: convergent nounwind readnone willreturn + declare i64 @llvm.amdgcn.if.break.i64(i1, i64) #3 + + ; Function Attrs: convergent nounwind willreturn + declare i1 @llvm.amdgcn.loop.i64(i64) #2 + + ; Function Attrs: convergent nounwind willreturn + declare void @llvm.amdgcn.end.cf.i64(i64) #2 + + attributes #0 = { "target-cpu"="gfx900" } + attributes #1 = { nofree nosync nounwind readnone speculatable willreturn "target-cpu"="gfx900" } + attributes #2 = { convergent nounwind willreturn } + attributes #3 = { convergent nounwind readnone willreturn } + +... +--- +name: s_rotl_rotr_i16 +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: sgpr_32, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } + - { id: 6, class: _, preferred-register: '' } + - { id: 7, class: _, preferred-register: '' } +liveins: + - { reg: '$sgpr2', virtual-reg: '%4' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr2' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_rotl_rotr_i16 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND1]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; GFX9: $sgpr0 = COPY [[INT]](s32) + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX10-LABEL: name: s_rotl_rotr_i16 + ; GFX10: liveins: $sgpr0, $sgpr1 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX10: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX10: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX10: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; GFX10: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]] + ; GFX10: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND1]](s16) + ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]] + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; GFX10: $sgpr0 = COPY [[INT]](s32) + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0 + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $sgpr1 + %1:_(s16) = G_TRUNC %3(s32) + %5:_(s16) = G_ROTL %0, %1(s16) + %6:_(s32) = G_ANYEXT %5(s16) + %7:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: s_rotl_rotr_i32 +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: sgpr_32, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: _, preferred-register: '' } +liveins: + - { reg: '$sgpr2', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr2' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_rotl_rotr_i32 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[SUB]](s32) + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR]](s32) + ; GFX9: $sgpr0 = COPY [[INT]](s32) + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX10-LABEL: name: s_rotl_rotr_i32 + ; GFX10: liveins: $sgpr0, $sgpr1 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX10: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[SUB]](s32) + ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR]](s32) + ; GFX10: $sgpr0 = COPY [[INT]](s32) + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %3:_(s32) = G_ROTL %0, %1(s32) + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %3(s32) + $sgpr0 = COPY %4(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: s_rotl_rotr_i64 +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: _, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } + - { id: 6, class: sgpr_32, preferred-register: '' } + - { id: 7, class: _, preferred-register: '' } + - { id: 8, class: _, preferred-register: '' } + - { id: 9, class: _, preferred-register: '' } + - { id: 10, class: _, preferred-register: '' } + - { id: 11, class: _, preferred-register: '' } +liveins: + - { reg: '$sgpr4', virtual-reg: '%6' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr4' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; GFX9-LABEL: name: s_rotl_rotr_i64 + ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[COPY2]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[COPY3]], [[USUBO1]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[TRUNC]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV2]], [[C1]] + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[TRUNC1]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](s64) + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX9: $sgpr0 = COPY [[INT]](s32) + ; GFX9: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32) + ; GFX9: $sgpr1 = COPY [[INT1]](s32) + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX10-LABEL: name: s_rotl_rotr_i64 + ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX10: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; GFX10: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[COPY2]] + ; GFX10: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[COPY3]], [[USUBO1]] + ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX10: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] + ; GFX10: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX10: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[TRUNC]](s32) + ; GFX10: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV2]], [[C1]] + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX10: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[TRUNC1]](s32) + ; GFX10: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](s64) + ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX10: $sgpr0 = COPY [[INT]](s32) + ; GFX10: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32) + ; GFX10: $sgpr1 = COPY [[INT1]](s32) + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %2:_(s32) = COPY $sgpr0 + %3:_(s32) = COPY $sgpr1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $sgpr2 + %5:_(s32) = COPY $sgpr3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s64) = G_ROTL %0, %1(s64) + %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %7(s64) + %10:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32) + $sgpr0 = COPY %10(s32) + %11:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %9(s32) + $sgpr1 = COPY %11(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + +... +--- +name: s_rotl_rotr_i33 +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: _, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } + - { id: 6, class: _, preferred-register: '' } + - { id: 7, class: _, preferred-register: '' } + - { id: 8, class: _, preferred-register: '' } + - { id: 9, class: _, preferred-register: '' } + - { id: 10, class: sgpr_32, preferred-register: '' } + - { id: 11, class: _, preferred-register: '' } + - { id: 12, class: _, preferred-register: '' } + - { id: 13, class: _, preferred-register: '' } + - { id: 14, class: _, preferred-register: '' } + - { id: 15, class: _, preferred-register: '' } + - { id: 16, class: _, preferred-register: '' } + - { id: 17, class: _, preferred-register: '' } + - { id: 18, class: _, preferred-register: '' } + - { id: 19, class: _, preferred-register: '' } +liveins: + - { reg: '$sgpr8', virtual-reg: '%10' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr8' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + + ; GFX9-LABEL: name: s_rotl_rotr_i33 + ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY4]] + ; GFX9: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[SUB]](s32) + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY5]] + ; GFX9: [[ROTR1:%[0-9]+]]:_(s32) = G_ROTR [[COPY1]], [[SUB1]](s32) + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY6]] + ; GFX9: [[ROTR2:%[0-9]+]]:_(s32) = G_ROTR [[COPY2]], [[SUB2]](s32) + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY7]] + ; GFX9: [[ROTR3:%[0-9]+]]:_(s32) = G_ROTR [[COPY3]], [[SUB3]](s32) + ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR]](s32) + ; GFX9: $sgpr0 = COPY [[INT]](s32) + ; GFX9: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR1]](s32) + ; GFX9: $sgpr1 = COPY [[INT1]](s32) + ; GFX9: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR2]](s32) + ; GFX9: $sgpr2 = COPY [[INT2]](s32) + ; GFX9: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR3]](s32) + ; GFX9: $sgpr3 = COPY [[INT3]](s32) + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3 + ; GFX10-LABEL: name: s_rotl_rotr_i33 + ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY4]] + ; GFX10: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[SUB]](s32) + ; GFX10: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY5]] + ; GFX10: [[ROTR1:%[0-9]+]]:_(s32) = G_ROTR [[COPY1]], [[SUB1]](s32) + ; GFX10: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY6]] + ; GFX10: [[ROTR2:%[0-9]+]]:_(s32) = G_ROTR [[COPY2]], [[SUB2]](s32) + ; GFX10: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY7]] + ; GFX10: [[ROTR3:%[0-9]+]]:_(s32) = G_ROTR [[COPY3]], [[SUB3]](s32) + ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR]](s32) + ; GFX10: $sgpr0 = COPY [[INT]](s32) + ; GFX10: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR1]](s32) + ; GFX10: $sgpr1 = COPY [[INT1]](s32) + ; GFX10: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR2]](s32) + ; GFX10: $sgpr2 = COPY [[INT2]](s32) + ; GFX10: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR3]](s32) + ; GFX10: $sgpr3 = COPY [[INT3]](s32) + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3 + %2:_(s32) = COPY $sgpr0 + %3:_(s32) = COPY $sgpr1 + %4:_(s32) = COPY $sgpr2 + %5:_(s32) = COPY $sgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %4(s32), %5(s32) + %6:_(s32) = COPY $sgpr4 + %7:_(s32) = COPY $sgpr5 + %8:_(s32) = COPY $sgpr6 + %9:_(s32) = COPY $sgpr7 + %1:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32), %8(s32), %9(s32) + %11:_(<4 x s32>) = G_ROTL %0, %1(<4 x s32>) + %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %11(<4 x s32>) + %16:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %12(s32) + $sgpr0 = COPY %16(s32) + %17:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %13(s32) + $sgpr1 = COPY %17(s32) + %18:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %14(s32) + $sgpr2 = COPY %18(s32) + %19:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %15(s32) + $sgpr3 = COPY %19(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/rotate_pre_regbankselect.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/rotate_pre_regbankselect.mir @@ -0,0 +1,717 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -o - | FileCheck -check-prefix=GFX10 %s + +--- | + ; ModuleID = '../llvm/test/CodeGen/AMDGPU/GlobalISel/rotl_rotr.ll' + source_filename = "../llvm/test/CodeGen/AMDGPU/GlobalISel/rotl_rotr.ll" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" + + define amdgpu_ps i16 @s_rotl_rotr_i16(i16 inreg %lhs, i16 inreg %amt) #0 { + %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %lhs, i16 %amt) + ret i16 %result + } + + define amdgpu_ps i32 @s_rotl_rotr_i32(i32 inreg %lhs, i32 inreg %amt) #0 { + %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %lhs, i32 %amt) + ret i32 %result + } + + define amdgpu_ps i64 @s_rotl_rotr_i64(i64 inreg %lhs, i64 inreg %amt) #0 { + %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %lhs, i64 %amt) + ret i64 %result + } + + define amdgpu_ps <4 x i32> @s_rotl_rotr_i33(<4 x i32> inreg %lhs, <4 x i32> inreg %amt) #0 { + %result = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %lhs, <4 x i32> %lhs, <4 x i32> %amt) + ret <4 x i32> %result + } + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare i16 @llvm.fshl.i16(i16, i16, i16) #1 + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare i32 @llvm.fshl.i32(i32, i32, i32) #1 + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare i64 @llvm.fshl.i64(i64, i64, i64) #1 + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #2 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #2 + + ; Function Attrs: convergent nounwind readnone willreturn + declare i64 @llvm.amdgcn.if.break.i64(i1, i64) #3 + + ; Function Attrs: convergent nounwind willreturn + declare i1 @llvm.amdgcn.loop.i64(i64) #2 + + ; Function Attrs: convergent nounwind willreturn + declare void @llvm.amdgcn.end.cf.i64(i64) #2 + + attributes #0 = { "target-cpu"="gfx900" } + attributes #1 = { nofree nosync nounwind readnone speculatable willreturn "target-cpu"="gfx900" } + attributes #2 = { convergent nounwind willreturn } + attributes #3 = { convergent nounwind readnone willreturn } + +... +--- +name: s_rotl_rotr_i16 +alignment: 1 +exposesReturnsTwice: false +legalized: true +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: sgpr_32, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } + - { id: 6, class: _, preferred-register: '' } + - { id: 7, class: _, preferred-register: '' } + - { id: 8, class: _, preferred-register: '' } + - { id: 9, class: _, preferred-register: '' } + - { id: 10, class: _, preferred-register: '' } + - { id: 11, class: _, preferred-register: '' } + - { id: 12, class: _, preferred-register: '' } + - { id: 13, class: _, preferred-register: '' } + - { id: 14, class: _, preferred-register: '' } +liveins: + - { reg: '$sgpr2', virtual-reg: '%4' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr2' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_rotl_rotr_i16 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[C:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 0 + ; GFX9: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 15 + ; GFX9: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) + ; GFX9: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[ANYEXT]], [[ANYEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SUB]](s32) + ; GFX9: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9: [[ANYEXT2:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[AND]](s16) + ; GFX9: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT2]], [[ZEXT]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SHL]](s32) + ; GFX9: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[C1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) + ; GFX9: [[ZEXT2:%[0-9]+]]:sgpr(s32) = G_ZEXT [[AND1]](s16) + ; GFX9: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[ZEXT1]], [[ZEXT2]](s32) + ; GFX9: [[TRUNC4:%[0-9]+]]:sgpr(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[OR:%[0-9]+]]:sgpr(s16) = G_OR [[TRUNC3]], [[TRUNC4]] + ; GFX9: [[ANYEXT3:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[ANYEXT3]](s32) + ; GFX9: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY2]](s32) + ; GFX9: $sgpr0 = COPY [[INT]](s32) + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX10-LABEL: name: s_rotl_rotr_i16 + ; GFX10: liveins: $sgpr0, $sgpr1 + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[C:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 0 + ; GFX10: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 15 + ; GFX10: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[C]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) + ; GFX10: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[ANYEXT]], [[ANYEXT1]] + ; GFX10: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SUB]](s32) + ; GFX10: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX10: [[ANYEXT2:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16) + ; GFX10: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[AND]](s16) + ; GFX10: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT2]], [[ZEXT]](s32) + ; GFX10: [[TRUNC3:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SHL]](s32) + ; GFX10: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[C1]] + ; GFX10: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) + ; GFX10: [[ZEXT2:%[0-9]+]]:sgpr(s32) = G_ZEXT [[AND1]](s16) + ; GFX10: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[ZEXT1]], [[ZEXT2]](s32) + ; GFX10: [[TRUNC4:%[0-9]+]]:sgpr(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10: [[OR:%[0-9]+]]:sgpr(s16) = G_OR [[TRUNC3]], [[TRUNC4]] + ; GFX10: [[ANYEXT3:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[OR]](s16) + ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[ANYEXT3]](s32) + ; GFX10: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY2]](s32) + ; GFX10: $sgpr0 = COPY [[INT]](s32) + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0 + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $sgpr1 + %1:_(s16) = G_TRUNC %3(s32) + %8:_(s16) = G_CONSTANT i16 0 + %9:_(s16) = G_CONSTANT i16 15 + %10:_(s16) = G_SUB %8, %1 + %11:_(s16) = G_AND %1, %9 + %12:_(s16) = G_SHL %0, %11(s16) + %13:_(s16) = G_AND %10, %9 + %14:_(s16) = G_LSHR %0, %13(s16) + %5:_(s16) = G_OR %12, %14 + %6:_(s32) = G_ANYEXT %5(s16) + %7:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %6(s32) + $sgpr0 = COPY %7(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: s_rotl_rotr_i32 +alignment: 1 +exposesReturnsTwice: false +legalized: true +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: sgpr_32, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: _, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } + - { id: 6, class: _, preferred-register: '' } +liveins: + - { reg: '$sgpr2', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr2' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_rotl_rotr_i32 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[C]], [[COPY1]] + ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SUB]](s32) + ; GFX9: [[ROTR:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY2]], [[COPY3]](s32) + ; GFX9: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR]](s32) + ; GFX9: $sgpr0 = COPY [[INT]](s32) + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX10-LABEL: name: s_rotl_rotr_i32 + ; GFX10: liveins: $sgpr0, $sgpr1 + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX10: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[C]], [[COPY1]] + ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SUB]](s32) + ; GFX10: [[ROTR:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY2]], [[COPY3]](s32) + ; GFX10: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR]](s32) + ; GFX10: $sgpr0 = COPY [[INT]](s32) + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %5:_(s32) = G_CONSTANT i32 0 + %6:_(s32) = G_SUB %5, %1 + %3:_(s32) = G_ROTR %0, %6(s32) + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %3(s32) + $sgpr0 = COPY %4(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... +--- +name: s_rotl_rotr_i64 +alignment: 1 +exposesReturnsTwice: false +legalized: true +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: _, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } + - { id: 6, class: sgpr_32, preferred-register: '' } + - { id: 7, class: _, preferred-register: '' } + - { id: 8, class: _, preferred-register: '' } + - { id: 9, class: _, preferred-register: '' } + - { id: 10, class: _, preferred-register: '' } + - { id: 11, class: _, preferred-register: '' } + - { id: 12, class: _, preferred-register: '' } + - { id: 13, class: _, preferred-register: '' } + - { id: 14, class: _, preferred-register: '' } + - { id: 15, class: _, preferred-register: '' } + - { id: 16, class: _, preferred-register: '' } + - { id: 17, class: _, preferred-register: '' } + - { id: 18, class: _, preferred-register: '' } + - { id: 19, class: _, preferred-register: '' } + - { id: 20, class: _, preferred-register: '' } + - { id: 21, class: _, preferred-register: '' } + - { id: 22, class: _, preferred-register: '' } + - { id: 23, class: _, preferred-register: '' } + - { id: 24, class: _, preferred-register: '' } + - { id: 25, class: _, preferred-register: '' } + - { id: 26, class: _, preferred-register: '' } + - { id: 27, class: _, preferred-register: '' } + - { id: 28, class: _, preferred-register: '' } +liveins: + - { reg: '$sgpr4', virtual-reg: '%6' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr4' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; GFX9-LABEL: name: s_rotl_rotr_i64 + ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GFX9: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 63 + ; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX9: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX9: [[USUBO:%[0-9]+]]:sgpr(s32), [[USUBO1:%[0-9]+]]:sgpr(s32) = G_USUBO [[C1]], [[COPY2]] + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBO1]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; GFX9: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[C2]], [[COPY3]], [[ZEXT]] + ; GFX9: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBE1]](s32) + ; GFX9: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[MV1]], [[C]] + ; GFX9: [[TRUNC2:%[0-9]+]]:sgpr(s32) = G_TRUNC [[AND]](s64) + ; GFX9: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[MV]], [[TRUNC2]](s32) + ; GFX9: [[AND1:%[0-9]+]]:sgpr(s64) = G_AND [[MV2]], [[C]] + ; GFX9: [[TRUNC3:%[0-9]+]]:sgpr(s32) = G_TRUNC [[AND1]](s64) + ; GFX9: [[LSHR:%[0-9]+]]:sgpr(s64) = G_LSHR [[MV]], [[TRUNC3]](s32) + ; GFX9: [[OR:%[0-9]+]]:sgpr(s64) = G_OR [[SHL]], [[LSHR]] + ; GFX9: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[OR]](s64) + ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GFX9: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY4]](s32) + ; GFX9: $sgpr0 = COPY [[INT]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GFX9: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GFX9: $sgpr1 = COPY [[INT1]](s32) + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX10-LABEL: name: s_rotl_rotr_i64 + ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GFX10: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 63 + ; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX10: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX10: [[USUBO:%[0-9]+]]:sgpr(s32), [[USUBO1:%[0-9]+]]:sgpr(s32) = G_USUBO [[C1]], [[COPY2]] + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBO1]](s32) + ; GFX10: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; GFX10: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[C2]], [[COPY3]], [[ZEXT]] + ; GFX10: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBE1]](s32) + ; GFX10: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX10: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[MV1]], [[C]] + ; GFX10: [[TRUNC2:%[0-9]+]]:sgpr(s32) = G_TRUNC [[AND]](s64) + ; GFX10: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[MV]], [[TRUNC2]](s32) + ; GFX10: [[AND1:%[0-9]+]]:sgpr(s64) = G_AND [[MV2]], [[C]] + ; GFX10: [[TRUNC3:%[0-9]+]]:sgpr(s32) = G_TRUNC [[AND1]](s64) + ; GFX10: [[LSHR:%[0-9]+]]:sgpr(s64) = G_LSHR [[MV]], [[TRUNC3]](s32) + ; GFX10: [[OR:%[0-9]+]]:sgpr(s64) = G_OR [[SHL]], [[LSHR]] + ; GFX10: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[OR]](s64) + ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GFX10: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY4]](s32) + ; GFX10: $sgpr0 = COPY [[INT]](s32) + ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GFX10: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GFX10: $sgpr1 = COPY [[INT1]](s32) + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + %2:_(s32) = COPY $sgpr0 + %3:_(s32) = COPY $sgpr1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $sgpr2 + %5:_(s32) = COPY $sgpr3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %13:_(s64) = G_CONSTANT i64 63 + %21:_(s32) = G_CONSTANT i32 0 + %22:_(s32) = G_CONSTANT i32 0 + %25:_(s32), %26:_(s1) = G_USUBO %21, %4 + %27:_(s32), %28:_(s1) = G_USUBE %22, %5, %26 + %14:_(s64) = G_MERGE_VALUES %25(s32), %27(s32) + %15:_(s64) = G_AND %1, %13 + %20:_(s32) = G_TRUNC %15(s64) + %16:_(s64) = G_SHL %0, %20(s32) + %17:_(s64) = G_AND %14, %13 + %19:_(s32) = G_TRUNC %17(s64) + %18:_(s64) = G_LSHR %0, %19(s32) + %7:_(s64) = G_OR %16, %18 + %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %7(s64) + %10:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32) + $sgpr0 = COPY %10(s32) + %11:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %9(s32) + $sgpr1 = COPY %11(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + +... +--- +name: s_rotl_rotr_i33 +alignment: 1 +exposesReturnsTwice: false +legalized: true +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: _, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } + - { id: 6, class: _, preferred-register: '' } + - { id: 7, class: _, preferred-register: '' } + - { id: 8, class: _, preferred-register: '' } + - { id: 9, class: _, preferred-register: '' } + - { id: 10, class: sgpr_32, preferred-register: '' } + - { id: 11, class: _, preferred-register: '' } + - { id: 12, class: _, preferred-register: '' } + - { id: 13, class: _, preferred-register: '' } + - { id: 14, class: _, preferred-register: '' } + - { id: 15, class: _, preferred-register: '' } + - { id: 16, class: _, preferred-register: '' } + - { id: 17, class: _, preferred-register: '' } + - { id: 18, class: _, preferred-register: '' } + - { id: 19, class: _, preferred-register: '' } + - { id: 20, class: _, preferred-register: '' } + - { id: 21, class: _, preferred-register: '' } + - { id: 22, class: _, preferred-register: '' } + - { id: 23, class: _, preferred-register: '' } + - { id: 24, class: _, preferred-register: '' } + - { id: 25, class: _, preferred-register: '' } + - { id: 26, class: _, preferred-register: '' } + - { id: 27, class: _, preferred-register: '' } + - { id: 28, class: _, preferred-register: '' } + - { id: 29, class: _, preferred-register: '' } + - { id: 30, class: _, preferred-register: '' } + - { id: 31, class: _, preferred-register: '' } + - { id: 32, class: _, preferred-register: '' } + - { id: 33, class: _, preferred-register: '' } + - { id: 34, class: _, preferred-register: '' } + - { id: 35, class: _, preferred-register: '' } + - { id: 36, class: _, preferred-register: '' } +liveins: + - { reg: '$sgpr8', virtual-reg: '%10' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: true + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$private_rsrc_reg' + frameOffsetReg: '$fp_reg' + stackPtrOffsetReg: '$sp_reg' + argumentInfo: + privateSegmentWaveByteOffset: { reg: '$sgpr8' } + mode: + ieee: false + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 10 +body: | + bb.1 (%ir-block.0): + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + + ; GFX9-LABEL: name: s_rotl_rotr_i33 + ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GFX9: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[C]], [[COPY4]] + ; GFX9: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[SUB]](s32) + ; GFX9: [[ROTR:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY8]], [[COPY9]](s32) + ; GFX9: [[SUB1:%[0-9]+]]:sgpr(s32) = G_SUB [[C]], [[COPY5]] + ; GFX9: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[SUB1]](s32) + ; GFX9: [[ROTR1:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY10]], [[COPY11]](s32) + ; GFX9: [[SUB2:%[0-9]+]]:sgpr(s32) = G_SUB [[C]], [[COPY6]] + ; GFX9: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[SUB2]](s32) + ; GFX9: [[ROTR2:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY12]], [[COPY13]](s32) + ; GFX9: [[SUB3:%[0-9]+]]:sgpr(s32) = G_SUB [[C]], [[COPY7]] + ; GFX9: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[SUB3]](s32) + ; GFX9: [[ROTR3:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY14]], [[COPY15]](s32) + ; GFX9: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR]](s32) + ; GFX9: $sgpr0 = COPY [[INT]](s32) + ; GFX9: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR1]](s32) + ; GFX9: $sgpr1 = COPY [[INT1]](s32) + ; GFX9: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR2]](s32) + ; GFX9: $sgpr2 = COPY [[INT2]](s32) + ; GFX9: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR3]](s32) + ; GFX9: $sgpr3 = COPY [[INT3]](s32) + ; GFX9: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3 + ; GFX10-LABEL: name: s_rotl_rotr_i33 + ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GFX10: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GFX10: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GFX10: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[C]], [[COPY4]] + ; GFX10: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX10: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[SUB]](s32) + ; GFX10: [[ROTR:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY8]], [[COPY9]](s32) + ; GFX10: [[SUB1:%[0-9]+]]:sgpr(s32) = G_SUB [[C]], [[COPY5]] + ; GFX10: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX10: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[SUB1]](s32) + ; GFX10: [[ROTR1:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY10]], [[COPY11]](s32) + ; GFX10: [[SUB2:%[0-9]+]]:sgpr(s32) = G_SUB [[C]], [[COPY6]] + ; GFX10: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; GFX10: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[SUB2]](s32) + ; GFX10: [[ROTR2:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY12]], [[COPY13]](s32) + ; GFX10: [[SUB3:%[0-9]+]]:sgpr(s32) = G_SUB [[C]], [[COPY7]] + ; GFX10: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) + ; GFX10: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[SUB3]](s32) + ; GFX10: [[ROTR3:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY14]], [[COPY15]](s32) + ; GFX10: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR]](s32) + ; GFX10: $sgpr0 = COPY [[INT]](s32) + ; GFX10: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR1]](s32) + ; GFX10: $sgpr1 = COPY [[INT1]](s32) + ; GFX10: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR2]](s32) + ; GFX10: $sgpr2 = COPY [[INT2]](s32) + ; GFX10: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[ROTR3]](s32) + ; GFX10: $sgpr3 = COPY [[INT3]](s32) + ; GFX10: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3 + %2:_(s32) = COPY $sgpr0 + %3:_(s32) = COPY $sgpr1 + %4:_(s32) = COPY $sgpr2 + %5:_(s32) = COPY $sgpr3 + %6:_(s32) = COPY $sgpr4 + %7:_(s32) = COPY $sgpr5 + %8:_(s32) = COPY $sgpr6 + %9:_(s32) = COPY $sgpr7 + %32:_(s32) = G_CONSTANT i32 0 + %36:_(s32) = G_SUB %32, %6 + %28:_(s32) = G_ROTR %2, %36(s32) + %35:_(s32) = G_SUB %32, %7 + %29:_(s32) = G_ROTR %3, %35(s32) + %34:_(s32) = G_SUB %32, %8 + %30:_(s32) = G_ROTR %4, %34(s32) + %33:_(s32) = G_SUB %32, %9 + %31:_(s32) = G_ROTR %5, %33(s32) + %16:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %28(s32) + $sgpr0 = COPY %16(s32) + %17:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %29(s32) + $sgpr1 = COPY %17(s32) + %18:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %30(s32) + $sgpr2 = COPY %18(s32) + %19:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %31(s32) + $sgpr3 = COPY %19(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3 + +...