diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1048,25 +1048,6 @@ if (!Src0->isImm() && !Src1->isImm()) return false; - if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32 || - MI->getOpcode() == AMDGPU::V_LSHL_ADD_U32 || - MI->getOpcode() == AMDGPU::V_AND_OR_B32) { - if (Src0->isImm() && Src0->getImm() == 0) { - // v_lshl_or_b32 0, X, Y -> copy Y - // v_lshl_or_b32 0, X, K -> v_mov_b32 K - // v_lshl_add_b32 0, X, Y -> copy Y - // v_lshl_add_b32 0, X, K -> v_mov_b32 K - // v_and_or_b32 0, X, Y -> copy Y - // v_and_or_b32 0, X, K -> v_mov_b32 K - bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg(); - MI->RemoveOperand(Src1Idx); - MI->RemoveOperand(Src0Idx); - - MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32)); - return true; - } - } - // and k0, k1 -> v_mov_b32 (k0 & k1) // or k0, k1 -> v_mov_b32 (k0 | k1) // xor k0, k1 -> v_mov_b32 (k0 ^ k1) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2046,6 +2046,16 @@ (S_LSHL_B32 SReg_32:$src1, (i16 16)) >; +def : GCNPat < + (v2i16 (build_vector (i16 SReg_32:$src1), (i16 0))), + (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1) +>; + +def : GCNPat < + (v2f16 (build_vector (f16 SReg_32:$src1), (f16 FP_ZERO))), + (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1) +>; + def : GCNPat < (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))), (COPY_TO_REGCLASS SReg_32:$src0, SReg_32) diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir --- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -422,64 +422,6 @@ S_ENDPGM 0, implicit $vcc ... ---- -# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_reg{{$}} -# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec -# GCN-NEXT: S_ENDPGM 0, implicit %2 - -name: constant_fold_lshl_or_reg0_immreg_reg -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0 - - %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - %2:vgpr_32 = V_LSHL_OR_B32 %0,%1, $vgpr0, implicit $exec - S_ENDPGM 0, implicit %2 - -... - ---- - -# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_imm{{$}} -# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec -# GCN-NEXT: S_ENDPGM 0, implicit %2 - -name: constant_fold_lshl_or_reg0_immreg_imm -tracksRegLiveness: true - -body: | - bb.0: - - %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - %2:vgpr_32 = V_LSHL_OR_B32 %0, %1, 10, implicit $exec - S_ENDPGM 0, implicit %2 - -... - ---- - -# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}} -# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec -# GCN-NEXT: S_ENDPGM 0, implicit %3 - -name: constant_fold_lshl_or_reg0_immreg_immreg -tracksRegLiveness: true - -body: | - bb.0: - - %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec - %3:vgpr_32 = V_LSHL_OR_B32 %0, %1, %2, implicit $exec - S_ENDPGM 0, implicit %3 - -... - --- # GCN-LABEL: name: s_fold_andn2_imm_regimm_32{{$}} # GCN: [[VAL:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1233024, implicit $exec @@ -820,152 +762,6 @@ ... ---- -# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_reg{{$}} -# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec -# GCN-NEXT: S_ENDPGM - -name: constant_fold_lshl_add_reg0_immreg_reg -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0 - - %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - %2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, $vgpr0, implicit $exec - S_ENDPGM 0, implicit %2 - -... - ---- - -# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_imm{{$}} -# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec -# GCN-NEXT: S_ENDPGM - -name: constant_fold_lshl_add_reg0_immreg_imm -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true - -body: | - bb.0: - - %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - %2:vgpr_32 = V_LSHL_ADD_U32 %0, %1, 10, implicit $exec - S_ENDPGM 0, implicit %2 - -... - ---- - -# GCN-LABEL: name: constant_fold_lshl_add_reg0_immreg_immreg{{$}} -# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec -# GCN-NEXT: S_ENDPGM - -name: constant_fold_lshl_add_reg0_immreg_immreg -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true - -body: | - bb.0: - - %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec - %3:vgpr_32 = V_LSHL_ADD_U32 %0, %1, %2, implicit $exec - S_ENDPGM 0, implicit %3 - -... - ---- -# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_reg{{$}} -# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec -# GCN-NEXT: S_ENDPGM - -name: constant_fold_and_or_reg0_immreg_reg -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0 - - %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - %2:vgpr_32 = V_AND_OR_B32 %0, %1, $vgpr0, implicit $exec - S_ENDPGM 0, implicit %2 - -... - ---- - -# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_imm{{$}} -# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec -# GCN-NEXT: S_ENDPGM - -name: constant_fold_and_or_reg0_immreg_imm -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true - -body: | - bb.0: - - %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - %2:vgpr_32 = V_AND_OR_B32 %0, %1, 10, implicit $exec - S_ENDPGM 0, implicit %2 - -... - ---- - -# GCN-LABEL: name: constant_fold_and_or_reg0_immreg_immreg{{$}} -# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec -# GCN-NEXT: S_ENDPGM - -name: constant_fold_and_or_reg0_immreg_immreg -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true - -body: | - bb.0: - - %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec - %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec - %3:vgpr_32 = V_AND_OR_B32 %0, %1, %2, implicit $exec - S_ENDPGM 0, implicit %3 - -... - # This used to incorrectly interpret V_MOV_B32_sdwa as being a move # immediate, and interpreting the src0_modifiers field as a # materialized immediate. diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -718,11 +718,10 @@ ; GCN-LABEL: {{^}}v_test_canonicalize_reg_undef_reg_reg_v4f16: ; GFX9: s_waitcnt -; GFX9-NEXT: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff -; GFX9-NEXT: v_and_b32_e32 v1, [[MASK]], v1 +; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 -; GFX9-NEXT: v_and_b32_e32 v0, [[MASK]], v0 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 ; GFX9-NEXT: s_setpc_b64