Index: llvm/lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SOPInstructions.td +++ llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -413,7 +413,9 @@ (ops node:$src0, node:$src1), (Op $src0, $src1), [{ return !N->isDivergent(); }] ->; +> { + let GISelPredicateCode = [{return true;}]; +} let Defs = [SCC] in { // Carry out goes to SCC let isCommutable = 1 in { @@ -535,22 +537,22 @@ let Defs = [SCC] in { // TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3 def S_LSHL_B32 : SOP2_32 <"s_lshl_b32", - [(set SReg_32:$sdst, (shl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_32:$sdst, (UniformBinFrag (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64", - [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_64:$sdst, (UniformBinFrag (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHR_B32 : SOP2_32 <"s_lshr_b32", - [(set SReg_32:$sdst, (srl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_32:$sdst, (UniformBinFrag (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64", - [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_64:$sdst, (UniformBinFrag (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_ASHR_I32 : SOP2_32 <"s_ashr_i32", - [(set SReg_32:$sdst, (sra (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_32:$sdst, (UniformBinFrag (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64", - [(set SReg_64:$sdst, (sra (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_64:$sdst, (UniformBinFrag (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; } // End Defs = [SCC] Index: llvm/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -541,14 +541,17 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; } // End SubtargetPredicate = isGFX6GFX7 -let SubtargetPredicate = isGFX6GFX7GFX10 in { let isCommutable = 1 in { +let SubtargetPredicate = isGFX6GFX7GFX10 in { defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; -defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32, srl>; -defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32, sra>; -defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32, shl>; -} // End isCommutable = 1 } // End SubtargetPredicate = isGFX6GFX7GFX10 +let SubtargetPredicate = isGFX6GFX7 in { +defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN, srl>; +defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN, sra>; +defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN, shl>; +} // End SubtargetPredicate = isGFX6GFX7 +} // End isCommutable = 1 + class DivergentBinOp : GCNPat< Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -385,10 +385,12 @@ } let SchedRW = [Write64Bit] in { -let SubtargetPredicate = isGFX6GFX7GFX10 in { +let SubtargetPredicate = isGFX6GFX7 in { def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile, shl>; def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile, srl>; def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile, sra>; +} // End SubtargetPredicate = isGFX6GFX7 +let SubtargetPredicate = isGFX6GFX7GFX10 in { def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; } // End SubtargetPredicate = isGFX6GFX7GFX10 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -1,5 +1,4 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s @@ -17,18 +16,18 @@ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_U32_]], %7, 0, implicit $exec + ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit %7, implicit %8, implicit %9 ; GFX9-LABEL: name: add_s32 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec - ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_U32_]], [[V_ADD_U32_e64_]], 0, implicit $exec + ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]] %0:sgpr(s32) = COPY $sgpr0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir @@ -237,8 +237,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_ASHR_I64_:%[0-9]+]]:vreg_64 = V_ASHR_I64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_ASHR_I64_]] + ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s64) = G_ASHR %0, %1 @@ -277,8 +277,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10: [[V_ASHR_I64_:%[0-9]+]]:vreg_64 = V_ASHR_I64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_ASHR_I64_]] + ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s64) = G_ASHR %0, %1 @@ -317,8 +317,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_ASHR_I64_:%[0-9]+]]:vreg_64 = V_ASHR_I64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_ASHR_I64_]] + ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_ASHR %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir @@ -123,8 +123,8 @@ ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; CHECK: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc - ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc + ; CHECK: S_ENDPGM 0, implicit [[S_ADD_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_CTPOP %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir @@ -237,8 +237,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_LSHR_B64_:%[0-9]+]]:vreg_64 = V_LSHR_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHR_B64_]] + ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s64) = G_LSHR %0, %1 @@ -277,8 +277,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10: [[V_LSHR_B64_:%[0-9]+]]:vreg_64 = V_LSHR_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHR_B64_]] + ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s64) = G_LSHR %0, %1 @@ -317,8 +317,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_LSHR_B64_:%[0-9]+]]:vreg_64 = V_LSHR_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHR_B64_]] + ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_LSHR %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir @@ -237,8 +237,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_LSHL_B64_:%[0-9]+]]:vreg_64 = V_LSHL_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHL_B64_]] + ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s64) = G_SHL %0, %1 @@ -277,8 +277,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10: [[V_LSHL_B64_:%[0-9]+]]:vreg_64 = V_LSHL_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHL_B64_]] + ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s64) = G_SHL %0, %1 @@ -317,8 +317,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_LSHL_B64_:%[0-9]+]]:vreg_64 = V_LSHL_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHL_B64_]] + ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_SHL %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir @@ -1,5 +1,4 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s @@ -20,9 +19,9 @@ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_I32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_I32_e64 [[S_SUB_U32_]], %7, 0, implicit $exec + ; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_I32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_I32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_I32_e64 %8, [[COPY2]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit %9 ; GFX9-LABEL: name: sub_s32 @@ -30,9 +29,9 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec - ; GFX9: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_U32_]], [[V_SUB_U32_e64_]], 0, implicit $exec + ; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX9: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec ; GFX9: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]] ; GFX10-LABEL: name: sub_s32 @@ -41,9 +40,9 @@ ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec - ; GFX10: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_U32_]], [[V_SUB_U32_e64_]], 0, implicit $exec + ; GFX10: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX10: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec ; GFX10: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]] %0:sgpr(s32) = COPY $sgpr0 Index: llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll @@ -42,8 +42,8 @@ ; CI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 ; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, 16, [[LHS]] ; CI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} -; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CI: v_ashr_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CI: v_ashr_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} ; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} Index: llvm/test/CodeGen/AMDGPU/bfe-patterns.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/bfe-patterns.ll +++ llvm/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -24,8 +24,11 @@ ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] -; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] -; GCN-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] +; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] +; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] + +; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] +; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] ; GCN: [[BFE]] ; GCN: [[SHL]] @@ -97,8 +100,11 @@ ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] -; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] -; GCN-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] +; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] +; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] + +; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] +; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] ; GCN: [[BFE]] ; GCN: [[SHL]] Index: llvm/test/CodeGen/AMDGPU/commute-shifts.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/commute-shifts.ll +++ llvm/test/CodeGen/AMDGPU/commute-shifts.ll @@ -17,7 +17,7 @@ ; SI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm ; SI-NEXT: v_and_b32_e32 v0, 7, v0 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v0, v0, v2 +; SI-NEXT: v_lshr_b32_e32 v0, v2, v0 ; SI-NEXT: v_and_b32_e32 v0, 1, v0 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; SI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc Index: llvm/test/CodeGen/AMDGPU/extract-lowbits.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/extract-lowbits.ll +++ llvm/test/CodeGen/AMDGPU/extract-lowbits.ll @@ -169,8 +169,8 @@ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 -; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 -; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; SI-NEXT: v_lshl_b32_e32 v0, v0, v1 +; SI-NEXT: v_lshr_b32_e32 v0, v0, v1 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: bzhi32_d1_indexzext: Index: llvm/test/CodeGen/AMDGPU/inline-asm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/inline-asm.ll +++ llvm/test/CodeGen/AMDGPU/inline-asm.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=PRE-GFX8 %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s ; CHECK-LABEL: {{^}}inline_asm: ; CHECK: s_endpgm @@ -241,7 +241,8 @@ ; CHECK: ; def v0 ; CHECK: v_mov_b32_e32 v1, v0 ; CHECK: ; def v0 -; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1 +; PRE-GFX8: v_lshl_b32_e32 v{{[0-9]+}}, v1, v0 +; GFX8: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1 define amdgpu_kernel void @muliple_def_phys_vgpr() { entry: %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"() Index: llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s @@ -124,7 +126,7 @@ ; CI-NEXT: v_and_b32_e32 v2, s8, v2 ; CI-NEXT: v_and_b32_e32 v3, s8, v3 ; CI-NEXT: v_lshrrev_b32_e32 v2, v3, v2 -; CI-NEXT: v_lshrrev_b32_e32 v3, v5, v4 +; CI-NEXT: v_lshr_b32_e32 v3, v4, v5 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 @@ -491,9 +493,9 @@ ; CI-NEXT: v_and_b32_e32 v3, s8, v3 ; CI-NEXT: v_and_b32_e32 v5, s8, v5 ; CI-NEXT: v_lshrrev_b32_e32 v3, v5, v3 -; CI-NEXT: v_lshrrev_b32_e32 v5, v9, v7 +; CI-NEXT: v_lshr_b32_e32 v5, v7, v9 ; CI-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; CI-NEXT: v_lshrrev_b32_e32 v4, v8, v6 +; CI-NEXT: v_lshr_b32_e32 v4, v6, v8 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; CI-NEXT: v_or_b32_e32 v3, v3, v5 Index: llvm/test/CodeGen/AMDGPU/sext-in-reg.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sext-in-reg.ll +++ llvm/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -577,7 +577,7 @@ ; GCN: {{buffer|flat|global}}_load_ushort [[VAL0:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_ushort [[VAL1:v[0-9]+]] -; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]] +; SI: v_lshl_b32_e32 [[REG:v[0-9]+]], [[VAL0]], [[VAL1]] ; GFX89: v_lshlrev_b16_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]] ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}} Index: llvm/test/CodeGen/AMDGPU/shl.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shl.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/shl.v2i16.ll @@ -123,8 +123,8 @@ ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_and_b32_e32 v5, s8, v3 ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; CI-NEXT: v_lshlrev_b32_e32 v3, v3, v4 -; CI-NEXT: v_lshlrev_b32_e32 v2, v5, v2 +; CI-NEXT: v_lshl_b32_e32 v3, v4, v3 +; CI-NEXT: v_lshl_b32_e32 v2, v2, v5 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_and_b32_e32 v2, s8, v2 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 @@ -491,10 +491,10 @@ ; CI-NEXT: v_and_b32_e32 v9, s8, v5 ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v3 ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 -; CI-NEXT: v_lshlrev_b32_e32 v5, v5, v7 -; CI-NEXT: v_lshlrev_b32_e32 v3, v9, v3 -; CI-NEXT: v_lshlrev_b32_e32 v4, v4, v6 -; CI-NEXT: v_lshlrev_b32_e32 v2, v8, v2 +; CI-NEXT: v_lshl_b32_e32 v5, v7, v5 +; CI-NEXT: v_lshl_b32_e32 v3, v3, v9 +; CI-NEXT: v_lshl_b32_e32 v4, v6, v4 +; CI-NEXT: v_lshl_b32_e32 v2, v2, v8 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; CI-NEXT: v_and_b32_e32 v3, s8, v3 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4