Index: llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -164,6 +164,16 @@ assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); unsigned Opcode = State.MIs[InsnID]->getOpcode(); + if (Opcode == TargetOpcode::COPY) { + Register CopySrc = State.MIs[InsnID]->getOperand(1).getReg(); + if (!CopySrc.isPhysical()) { + auto MI2 = State.MIs[InsnID] = getDefIgnoringCopies(CopySrc, MRI); + if (MI2) { + State.MIs[InsnID] = MI2; + Opcode = MI2->getOpcode(); + } + } + } DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID Index: llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll @@ -98,10 +98,9 @@ ; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_movk_i32 s4, 0xffc0 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_add_u16_e32 v1, s4, v0 -; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 64 +; GFX8-NEXT: v_subrev_u16_e32 v1, 64, v0 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] %add = add <2 x i16> %a, @@ -120,7 +119,7 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 4 -; GFX8-NEXT: v_add_u16_e32 v1, 0xffc0, v0 +; GFX8-NEXT: v_subrev_u16_e32 v1, 64, v0 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -139,10 +138,10 @@ ; GFX8-LABEL: v_add_v2i16_neg_inline_imm_hi: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffc0 -; GFX8-NEXT: v_add_u16_e32 v2, 4, v0 -; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, 64 +; GFX8-NEXT: v_add_u16_e32 v1, 4, v0 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] %add = add <2 x i16> %a, ret <2 x i16> %add Index: llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll @@ -45,9 +45,8 @@ ; ; GFX10-LABEL: add_shl_vgpr_c: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_add_i32 s2, s2, s3 +; GFX10-NEXT: v_add_lshl_u32 v0, s2, s3, v0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_lshlrev_b32_e64 v0, v0, s2 ; GFX10-NEXT: ; return to shader part epilog %x = add i32 %a, %b %result = shl i32 %x, %c Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir @@ -162,10 +162,8 @@ ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] - ; GFX10: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] + ; GFX10: [[V_AND_OR_B32_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:vgpr(s32) = COPY $vgpr0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir @@ -126,10 +126,8 @@ ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX10: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY3]], [[COPY2]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; GFX10: [[V_XOR3_B32_:%[0-9]+]]:vgpr_32 = V_XOR3_B32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_XOR3_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:vgpr(s32) = COPY $vgpr0 @@ -174,10 +172,8 @@ ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_XOR_B32_]] - ; GFX10: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY2]], [[COPY3]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; GFX10: [[V_XOR3_B32_:%[0-9]+]]:vgpr_32 = V_XOR3_B32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_XOR3_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:vgpr(s32) = COPY $vgpr0 Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll @@ -56,9 +56,8 @@ define i32 @select_with_negation(i32 %a, i32 %b, i32 %x, i32 %y) { ; MIPS32-LABEL: select_with_negation: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $1, $zero, 1 -; MIPS32-NEXT: slt $2, $4, $5 -; MIPS32-NEXT: xor $1, $2, $1 +; MIPS32-NEXT: slt $1, $4, $5 +; MIPS32-NEXT: xori $1, $1, 1 ; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: movn $7, $6, $1 ; MIPS32-NEXT: move $2, $7