Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4146,6 +4146,8 @@ case G_FMAXIMUM: case G_FSHL: case G_FSHR: + case G_ROTL: + case G_ROTR: case G_FREEZE: case G_SADDSAT: case G_SSUBSAT: Index: llvm/lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -621,7 +621,7 @@ // rotr pattern class ROTRPattern : AMDGPUPat < (rotr i32:$src0, i32:$src1), - (BIT_ALIGN $src0, $src0, $src1) + (BIT_ALIGN i32:$src0, i32:$src0, i32:$src1) >; // Special conversion patterns Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1624,7 +1624,7 @@ .lower(); // TODO: Only Try to form v2s16 with legal packed instructions. - getActionDefinitionsBuilder(G_FSHR) + getActionDefinitionsBuilder({G_FSHR, G_ROTR}) .legalFor({{S32, S32}}) .lowerFor({{V2S16, V2S16}}) .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) @@ -1632,13 +1632,13 @@ .lower(); if (ST.hasVOP3PInsts()) { - getActionDefinitionsBuilder(G_FSHL) + getActionDefinitionsBuilder({G_FSHL, G_ROTL}) .lowerFor({{V2S16, V2S16}}) .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) .scalarize(0) .lower(); } else { - getActionDefinitionsBuilder(G_FSHL) + getActionDefinitionsBuilder({G_FSHL, G_ROTL}) .scalarize(0) .lower(); } Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3562,6 +3562,8 @@ case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar? case AMDGPU::G_FSHR: // TODO: Expand for scalar + case AMDGPU::G_ROTL: + case AMDGPU::G_ROTR: case AMDGPU::G_AMDGPU_FFBH_U32: case AMDGPU::G_AMDGPU_FMIN_LEGACY: case AMDGPU::G_AMDGPU_FMAX_LEGACY: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-rotr.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-rotr.mir @@ -0,0 +1,24 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -run-pass=instruction-select %s -o - | FileCheck %s --check-prefix=GFX +--- +name: rotr_i32 +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX-LABEL: name: rotr_i32 + ; GFX: liveins: $vgpr0, $vgpr1 + ; GFX: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], [[COPY1]], implicit $exec + ; GFX: SI_RETURN_TO_EPILOG implicit [[V_ALIGNBIT_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_ROTR %0, %1(s32) + SI_RETURN_TO_EPILOG implicit %2 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir @@ -0,0 +1,302 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=GFX,GFX6 +# RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=GFX,GFX8 + +--- +name: rotl_i16 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX6-LABEL: name: rotl_i16 + ; GFX6: liveins: $sgpr0, $sgpr1 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY2]] + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6: SI_RETURN_TO_EPILOG implicit [[ANYEXT]](s32) + ; GFX8-LABEL: name: rotl_i16 + ; GFX8: liveins: $sgpr0, $sgpr1 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]] + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND1]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8: SI_RETURN_TO_EPILOG implicit [[ANYEXT]](s32) + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $sgpr1 + %1:_(s16) = G_TRUNC %3(s32) + %5:_(s16) = G_ROTL %0, %1(s16) + %4:_(s32) = G_ANYEXT %5(s16) + SI_RETURN_TO_EPILOG implicit %4 + +... +--- +name: rotl_i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX-LABEL: name: rotl_i32 + ; GFX: liveins: $sgpr0, $sgpr1 + ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; GFX: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[SUB]](s32) + ; GFX: SI_RETURN_TO_EPILOG implicit [[ROTR]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ROTL %0, %1(s32) + SI_RETURN_TO_EPILOG implicit %2 + +... +--- +name: rotl_i64 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; GFX-LABEL: name: rotl_i64 + ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; GFX: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1 + ; GFX: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3 + ; GFX: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; GFX: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; GFX: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] + ; GFX: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC1]](s32) + ; GFX: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; GFX: SI_RETURN_TO_EPILOG implicit [[OR]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ROTL %0, %1(s64) + SI_RETURN_TO_EPILOG implicit %2 + +... +--- +name: rotl_v4i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + + ; GFX-LABEL: name: rotl_v4i32 + ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV4]] + ; GFX: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[UV]], [[SUB]](s32) + ; GFX: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV5]] + ; GFX: [[ROTR1:%[0-9]+]]:_(s32) = G_ROTR [[UV1]], [[SUB1]](s32) + ; GFX: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV6]] + ; GFX: [[ROTR2:%[0-9]+]]:_(s32) = G_ROTR [[UV2]], [[SUB2]](s32) + ; GFX: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV7]] + ; GFX: [[ROTR3:%[0-9]+]]:_(s32) = G_ROTR [[UV3]], [[SUB3]](s32) + ; GFX: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ROTR]](s32), [[ROTR1]](s32), [[ROTR2]](s32), [[ROTR3]](s32) + ; GFX: SI_RETURN_TO_EPILOG implicit [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:_(<4 x s32>) = G_ROTL %0, %1(<4 x s32>) + SI_RETURN_TO_EPILOG implicit %2 + +... +--- +name: rotr_i16 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX6-LABEL: name: rotr_i16 + ; GFX6: liveins: $sgpr0, $sgpr1 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY2]] + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[ZEXT]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND2]](s16) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[ZEXT1]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6: SI_RETURN_TO_EPILOG implicit [[ANYEXT]](s32) + ; GFX8-LABEL: name: rotr_i16 + ; GFX8: liveins: $sgpr0, $sgpr1 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]] + ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) + ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND1]](s16) + ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8: SI_RETURN_TO_EPILOG implicit [[ANYEXT]](s32) + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $sgpr1 + %1:_(s16) = G_TRUNC %3(s32) + %5:_(s16) = G_ROTR %0, %1(s16) + %4:_(s32) = G_ANYEXT %5(s16) + SI_RETURN_TO_EPILOG implicit %4 + +... +--- +name: rotr_i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX-LABEL: name: rotr_i32 + ; GFX: liveins: $sgpr0, $sgpr1 + ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[COPY1]](s32) + ; GFX: SI_RETURN_TO_EPILOG implicit [[ROTR]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ROTR %0, %1(s32) + SI_RETURN_TO_EPILOG implicit %2 + +... +--- +name: rotr_i64 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; GFX-LABEL: name: rotr_i64 + ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; GFX: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1 + ; GFX: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3 + ; GFX: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; GFX: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]] + ; GFX: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32) + ; GFX: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]] + ; GFX: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC1]](s32) + ; GFX: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] + ; GFX: SI_RETURN_TO_EPILOG implicit [[OR]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ROTR %0, %1(s64) + SI_RETURN_TO_EPILOG implicit %2 + +... +--- +name: rotr_v4i32 +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + + ; GFX-LABEL: name: rotr_v4i32 + ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[UV]], [[UV4]](s32) + ; GFX: [[ROTR1:%[0-9]+]]:_(s32) = G_ROTR [[UV1]], [[UV5]](s32) + ; GFX: [[ROTR2:%[0-9]+]]:_(s32) = G_ROTR [[UV2]], [[UV6]](s32) + ; GFX: [[ROTR3:%[0-9]+]]:_(s32) = G_ROTR [[UV3]], [[UV7]](s32) + ; GFX: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ROTR]](s32), [[ROTR1]](s32), [[ROTR2]](s32), [[ROTR3]](s32) + ; GFX: SI_RETURN_TO_EPILOG implicit [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + %2:_(<4 x s32>) = G_ROTR %0, %1(<4 x s32>) + SI_RETURN_TO_EPILOG implicit %2 + +... + Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-rotr.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-rotr.mir @@ -0,0 +1,49 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -run-pass=regbankselect %s -o - | FileCheck %s --check-prefix=GFX + +--- +name: rotr_i32_s +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX-LABEL: name: rotr_i32_s + ; GFX: liveins: $sgpr0, $sgpr1 + ; GFX: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GFX: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX: [[ROTR:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY2]], [[COPY3]](s32) + ; GFX: SI_RETURN_TO_EPILOG implicit [[ROTR]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ROTR %0, %1(s32) + SI_RETURN_TO_EPILOG implicit %2 + +... +--- +name: rotr_i32_v +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX-LABEL: name: rotr_i32_v + ; GFX: liveins: $vgpr0, $vgpr1 + ; GFX: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX: [[ROTR:%[0-9]+]]:vgpr(s32) = G_ROTR [[COPY]], [[COPY1]](s32) + ; GFX: SI_RETURN_TO_EPILOG implicit [[ROTR]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ROTR %0, %1(s32) + SI_RETURN_TO_EPILOG implicit %2 + +...