diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2051,8 +2051,12 @@ Register SrcReg = MI.getOperand(1).getReg(); - // First ZEXT the input. - auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); + // First extend the input. + unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ || + MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF + ? TargetOpcode::G_ANYEXT + : TargetOpcode::G_ZEXT; + auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg}); LLT CurTy = MRI.getType(SrcReg); unsigned NewOpc = MI.getOpcode(); if (NewOpc == TargetOpcode::G_CTTZ) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz-zero-undef.mir @@ -10,9 +10,9 @@ liveins: $w0 ; CHECK-LABEL: name: s8 ; CHECK: liveins: $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[C1]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]] ; CHECK: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]] ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32) @@ -34,9 +34,9 @@ liveins: $w0 ; CHECK-LABEL: name: s16 ; CHECK: liveins: $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[C1]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]] ; CHECK: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]] ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cttz.mir @@ -10,9 +10,9 @@ liveins: $w0 ; CHECK-LABEL: name: s8 ; CHECK: liveins: $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[C1]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]] ; CHECK: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]] ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32) @@ -34,9 +34,9 @@ liveins: $w0 ; CHECK-LABEL: name: s16 ; CHECK: liveins: $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[C1]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[C]] ; CHECK: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[OR]] ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[BITREVERSE]](s32) ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir @@ -74,14 +74,13 @@ liveins: $vgpr0 ; CHECK-LABEL: name: cttz_zero_undef_s16_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; CHECK: $vgpr0 = COPY [[AND1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_CTTZ_ZERO_UNDEF %1 @@ -136,21 +135,19 @@ ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND1]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -167,14 +164,13 @@ ; CHECK-LABEL: name: cttz_zero_undef_s7_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; CHECK: $vgpr0 = COPY [[AND1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 %2:_(s7) = G_CTTZ_ZERO_UNDEF %1 @@ -191,16 +187,14 @@ ; CHECK-LABEL: name: cttz_zero_undef_s33_s33 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s64) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY1]](s64) ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64) - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C1]](s64) - ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND1]](s64) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -82,16 +82,15 @@ liveins: $vgpr0 ; CHECK-LABEL: name: cttz_s16_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[C]] ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; CHECK: $vgpr0 = COPY [[AND1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_CTTZ %1 @@ -152,24 +151,22 @@ ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C2]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[C1]] ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[C2]] + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[C1]] ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -186,16 +183,15 @@ ; CHECK-LABEL: name: cttz_s7_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[C]] ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; CHECK: $vgpr0 = COPY [[AND1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 %2:_(s7) = G_CTTZ %1 @@ -212,18 +208,16 @@ ; CHECK-LABEL: name: cttz_s33_s33 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934592 - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[C1]] + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934592 + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY1]], [[C]] ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s64) ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64) - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C2]](s64) - ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND1]](s64) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C1]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll --- a/llvm/test/CodeGen/AMDGPU/cttz.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz.ll @@ -492,11 +492,10 @@ ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x100 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: global_load_ubyte v1, v0, s[2:3] ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0xff, v1, v2 +; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x100, v1 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 ; GFX10-GISEL-NEXT: global_store_byte v0, v1, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm @@ -1385,15 +1384,14 @@ ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s3 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v1, v0 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v2, v3, vcc_lo +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x100, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_sdwa s2, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo -; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, s2 +; GFX10-GISEL-NEXT: global_store_byte v2, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid @@ -1600,7 +1598,7 @@ ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v2, v3, vcc_lo ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: v_and_or_b32 v1, v0, s2, 0x80 +; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x80, v0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll --- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -365,8 +365,7 @@ ; GFX9-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: s_and_b32 s0, s4, 0xff -; GFX9-GISEL-NEXT: s_ff1_i32_b32 s0, s0 +; GFX9-GISEL-NEXT: s_ff1_i32_b32 s0, s4 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[2:3] ; GFX9-GISEL-NEXT: s_endpgm @@ -437,8 +436,7 @@ ; GFX9-GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: s_and_b32 s0, s4, 0xffff -; GFX9-GISEL-NEXT: s_ff1_i32_b32 s0, s0 +; GFX9-GISEL-NEXT: s_ff1_i32_b32 s0, s4 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: global_store_short v1, v0, s[2:3] ; GFX9-GISEL-NEXT: s_endpgm