Index: llvm/lib/Target/AMDGPU/AMDGPUCombine.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -37,12 +37,18 @@ [{ return matchCvtF32UByteN(*${cvt_f32_ubyteN}, MRI, *MF, ${matchinfo}); }]), (apply [{ applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>; +def add_with_zero: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_PTR_ADD):$root, + [{ return matchAddZero(*${root}, MRI, *MF, Helper); }]), + (apply [{ applyAddZero(*${root}); }])>; + // Combines which should only apply on SI/VI def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>; def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper< - "AMDGPUGenPreLegalizerCombinerHelper", [all_combines]> { + "AMDGPUGenPreLegalizerCombinerHelper", [all_combines, add_with_zero]> { let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule"; } Index: llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -28,6 +28,25 @@ using namespace llvm; using namespace MIPatternMatch; +static bool matchAddZero(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineFunction &MF, CombinerHelper &Helper) { + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + const DataLayout &DL = MF.getDataLayout(); + + if (Ty.isPointer() && DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) + return false; + + auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI); + return ConstVal && *ConstVal == 0; +} + +static void applyAddZero(MachineInstr &MI) { + MachineIRBuilder B(MI); + B.buildIntToPtr(MI.getOperand(0), MI.getOperand(2)); + MI.eraseFromParent(); +} + #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AMDGPUGenPreLegalizeGICombiner.inc" #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-nullptr.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-nullptr.mir @@ -0,0 +1,56 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: add_nullptr_sgpr +tracksRegLiveness: true +body: | + bb.0.: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: add_nullptr_sgpr + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[SHL]](s32) + ; CHECK: $sgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $sgpr0 + %10:_(s32) = G_CONSTANT i32 3 + %5:_(s32) = G_SHL %0, %10(s32) + %3:_(p3) = G_CONSTANT i32 0 + %6:_(p3) = G_PTR_ADD %3, %5(s32) + %8:_(s32) = G_PTRTOINT %6(p3) + %9:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32) + $sgpr0 = COPY %9(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +... + +--- +name: add_nullptr_vgpr +tracksRegLiveness: true +body: | + bb.0.: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: add_nullptr_vgpr + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[SHL]](s32) + ; CHECK: $sgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + %0:_(s32) = COPY $vgpr0 + %10:_(s32) = G_CONSTANT i32 3 + %5:_(s32) = G_SHL %0, %10(s32) + %3:_(p3) = G_CONSTANT i32 0 + %6:_(p3) = G_PTR_ADD %3, %5(s32) + %8:_(s32) = G_PTRTOINT %6(p3) + %9:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32) + $sgpr0 = COPY %9(s32) + SI_RETURN_TO_EPILOG implicit $sgpr0 + +...