Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1619,9 +1619,14 @@ const unsigned Mask = Arg->getMask(); const unsigned Shift = countTrailingZeros(Mask); - auto ShiftAmt = B.buildConstant(S32, Shift); - auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt); - B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift)); + Register AndMaskSrc = LiveIn; + + if (Shift != 0) { + auto ShiftAmt = B.buildConstant(S32, Shift); + AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0); + } + + B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift)); } else B.buildCopy(DstReg, LiveIn); Index: test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -89,7 +89,7 @@ } ; ALL-LABEL: {{^}}test_workitem_id_x_func: -; ALL: v_lshrrev_b32_e32 v2, 0, v2 +; ALL: s_waitcnt ; ALL-NEXT: v_and_b32_e32 v2, 0x3ff, v2 define void @test_workitem_id_x_func(i32 addrspace(1)* %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.x()