Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -114,6 +114,12 @@ void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx = 0, unsigned TruncOpcode = TargetOpcode::G_TRUNC); + // Legalize a single operand \p OpIdx of the machine instruction \p MI as a + // Def by truncating the operand's type to \p NarrowTy, replacing in place and + // extending back with \p ExtOpcode. + void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, + unsigned ExtOpcode); + /// Helper function to split a wide generic register into bitwise blocks with /// the given Type (which implies the number of blocks needed). The generic /// registers created are appended to Ops, starting at bit 0 of Reg. Index: include/llvm/Target/GenericOpcodes.td =================================================================== --- include/llvm/Target/GenericOpcodes.td +++ include/llvm/Target/GenericOpcodes.td @@ -123,31 +123,31 @@ def G_CTLZ : GenericInstruction { let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); + let InOperandList = (ins type1:$src); let hasSideEffects = 0; } def G_CTLZ_ZERO_UNDEF : GenericInstruction { let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); + let InOperandList = (ins type1:$src); let hasSideEffects = 0; } def G_CTTZ : GenericInstruction { let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); + let InOperandList = (ins type1:$src); let hasSideEffects = 0; } def G_CTTZ_ZERO_UNDEF : GenericInstruction { let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); + let InOperandList = (ins type1:$src); let hasSideEffects = 0; } def G_CTPOP : GenericInstruction { let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type0:$src); + let InOperandList = (ins type1:$src); let hasSideEffects = 0; } Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -667,6 +667,15 @@ narrowScalarSrc(MI, NarrowTy, 2); return Legalized; } + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTLZ_ZERO_UNDEF: + case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTTZ_ZERO_UNDEF: + case TargetOpcode::G_CTPOP: + if (TypeIdx != 0) + return UnableToLegalize; // TODO + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); + return Legalized; } } @@ -694,6 +703,15 @@ MO.setReg(DstExt); } +void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, + unsigned OpIdx, unsigned ExtOpcode) { + MachineOperand &MO = MI.getOperand(OpIdx); + unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); + MO.setReg(DstTrunc); +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { MIRBuilder.setInstr(MI); @@ -732,6 +750,11 @@ case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTPOP: { + if (TypeIdx == 0) { + widenScalarDst(MI, WideTy, 0); + return Legalized; + } + // First ZEXT the input. auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); @@ -1542,7 +1565,7 @@ case TargetOpcode::G_CTLZ: { unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Len = Ty.getSizeInBits(); - if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { + if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}, {SrcReg}); @@ -1592,7 +1615,7 @@ case TargetOpcode::G_CTTZ: { unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Len = Ty.getSizeInBits(); - if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { + if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with // zero. auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, @@ -1617,8 +1640,8 @@ TargetOpcode::G_AND, {Ty}, {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, {SrcReg, MIBCstNeg1})}); - if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) && - isSupported({TargetOpcode::G_CTLZ, {Ty}})) { + if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && + isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); MIRBuilder.buildInstr( TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -206,12 +206,17 @@ .legalFor({S32}) .scalarize(0); - setAction({G_CTLZ, S32}, Legal); - setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal); - setAction({G_CTTZ, S32}, Legal); - setAction({G_CTTZ_ZERO_UNDEF, S32}, Legal); + // The 64-bit versions produce 32-bit results, but only on the SALU. + getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF, + G_CTTZ, G_CTTZ_ZERO_UNDEF, + G_CTPOP}) + .legalFor({{S32, S32}, {S32, S64}}) + .clampScalar(0, S32, S32) + .clampScalar(1, S32, S64); + // TODO: Scalarize + + setAction({G_BSWAP, S32}, Legal); - setAction({G_CTPOP, S32}, Legal); getActionDefinitionsBuilder(G_INTTOPTR) .legalIf([](const LegalityQuery &Query) { Index: lib/Target/ARM/ARMLegalizerInfo.cpp =================================================================== --- lib/Target/ARM/ARMLegalizerInfo.cpp +++ lib/Target/ARM/ARMLegalizerInfo.cpp @@ -144,17 +144,21 @@ if (ST.hasV5TOps()) { getActionDefinitionsBuilder(G_CTLZ) - .legalFor({s32}) + .legalFor({s32, s32}) + .clampScalar(1, s32, s32) .clampScalar(0, s32, s32); getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) - .lowerFor({s32}) + .lowerFor({s32, s32}) + .clampScalar(1, s32, s32) .clampScalar(0, s32, s32); } else { getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) - .libcallFor({s32}) + .libcallFor({s32, s32}) + .clampScalar(1, s32, s32) .clampScalar(0, s32, s32); getActionDefinitionsBuilder(G_CTLZ) - .lowerFor({s32}) + .lowerFor({s32, s32}) + .clampScalar(1, s32, s32) .clampScalar(0, s32, s32); } Index: test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -312,19 +312,19 @@ # DEBUG-NEXT: G_SHUFFLE_VECTOR (opcode {{[0-9]+}}): 3 type indices # DEBUG: .. type index coverage check SKIPPED: no rules defined # -# DEBUG-NEXT: G_CTTZ (opcode {{[0-9]+}}): 1 type index +# DEBUG-NEXT: G_CTTZ (opcode {{[0-9]+}}): 2 type indices # DEBUG: .. type index coverage check SKIPPED: no rules defined # -# DEBUG-NEXT: G_CTTZ_ZERO_UNDEF (opcode {{[0-9]+}}): 1 type index +# DEBUG-NEXT: G_CTTZ_ZERO_UNDEF (opcode {{[0-9]+}}): 2 type indices # DEBUG: .. type index coverage check SKIPPED: no rules defined # -# DEBUG-NEXT: G_CTLZ (opcode {{[0-9]+}}): 1 type index +# DEBUG-NEXT: G_CTLZ (opcode {{[0-9]+}}): 2 type indices # DEBUG: .. type index coverage check SKIPPED: no rules defined # -# DEBUG-NEXT: G_CTLZ_ZERO_UNDEF (opcode {{[0-9]+}}): 1 type index +# DEBUG-NEXT: G_CTLZ_ZERO_UNDEF (opcode {{[0-9]+}}): 2 type indices # DEBUG: .. type index coverage check SKIPPED: no rules defined # -# DEBUG-NEXT: G_CTPOP (opcode {{[0-9]+}}): 1 type index +# DEBUG-NEXT: G_CTPOP (opcode {{[0-9]+}}): 2 type indices # DEBUG: .. type index coverage check SKIPPED: no rules defined # # DEBUG-NEXT: G_BSWAP (opcode {{[0-9]+}}): 1 type index Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: ctlz_zero_undef_s32_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctlz_zero_undef_s32_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[CTLZ_ZERO_UNDEF]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 + $vgpr0 = COPY %1 +... + +--- +name: ctlz_zero_undef_s32_s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ctlz_zero_undef_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; CHECK: $vgpr0 = COPY [[CTLZ_ZERO_UNDEF]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 + $vgpr0 = COPY %1 +... + +--- +name: ctlz_zero_undef_s64_s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ctlz_zero_undef_s64_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_CTLZ_ZERO_UNDEF %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: ctlz_zero_undef_s16_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctlz_zero_undef_s16_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ_ZERO_UNDEF]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_CTLZ_ZERO_UNDEF %0 + %2:_(s32) = G_ZEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: ctlz_zero_undef_s16_s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctlz_zero_undef_s16_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[TRUNC]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ_ZERO_UNDEF]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_CTLZ_ZERO_UNDEF %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: ctlz_s32_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctlz_s32_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[CTLZ]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CTLZ %0 + $vgpr0 = COPY %1 +... + +--- +name: ctlz_s32_s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ctlz_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s64) + ; CHECK: $vgpr0 = COPY [[CTLZ]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CTLZ %0 + $vgpr0 = COPY %1 +... + +--- +name: ctlz_s64_s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ctlz_s64_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_CTLZ %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: ctlz_s16_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctlz_s16_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_CTLZ %0 + %2:_(s32) = G_ZEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: ctlz_s16_s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctlz_s16_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[TRUNC]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_CTLZ %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: ctpop_s32_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctpop_s32_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[CTPOP]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CTPOP %0 + $vgpr0 = COPY %1 +... + +--- +name: ctpop_s32_s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ctpop_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s64) + ; CHECK: $vgpr0 = COPY [[CTPOP]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CTPOP %0 + $vgpr0 = COPY %1 +... + +--- +name: ctpop_s64_s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ctpop_s64_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTPOP]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_CTPOP %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: ctpop_s16_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctpop_s16_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_CTPOP %0 + %2:_(s32) = G_ZEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: ctpop_s16_s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctpop_s16_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[TRUNC]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_CTPOP %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: cttz_zero_undef_s32_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: cttz_zero_undef_s32_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[CTTZ_ZERO_UNDEF]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 + $vgpr0 = COPY %1 +... + +--- +name: cttz_zero_undef_s32_s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: cttz_zero_undef_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) + ; CHECK: $vgpr0 = COPY [[CTTZ_ZERO_UNDEF]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 + $vgpr0 = COPY %1 +... + +--- +name: cttz_zero_undef_s64_s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: cttz_zero_undef_s64_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_CTTZ_ZERO_UNDEF %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: cttz_zero_undef_s16_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: cttz_zero_undef_s16_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_CTTZ_ZERO_UNDEF %0 + %2:_(s32) = G_ZEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: cttz_zero_undef_s16_s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: cttz_zero_undef_s16_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[TRUNC]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_CTTZ_ZERO_UNDEF %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: cttz_s32_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: cttz_s32_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[CTTZ]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CTTZ %0 + $vgpr0 = COPY %1 +... + +--- +name: cttz_s32_s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: cttz_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[COPY]](s64) + ; CHECK: $vgpr0 = COPY [[CTTZ]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CTTZ %0 + $vgpr0 = COPY %1 +... + +--- +name: cttz_s64_s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: cttz_s64_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[COPY]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_CTTZ %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: cttz_s16_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: cttz_s16_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_CTTZ %0 + %2:_(s32) = G_ZEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: cttz_s16_s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: cttz_s16_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[TRUNC]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_CTTZ %1 + %3:_(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... Index: unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp =================================================================== --- unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -26,8 +26,9 @@ return; // Declare your legalization info - DefineLegalizerInfo( - A, { getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).legalFor({s64}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).legalFor({{s64, s64}}); + }); // Build Instr auto MIBCTTZ = B.buildInstr(TargetOpcode::G_CTTZ, {LLT::scalar(64)}, {Copies[0]}); @@ -56,8 +57,9 @@ return; // Declare your legalization info - DefineLegalizerInfo(A, - { getActionDefinitionsBuilder(G_CTLZ).legalFor({s64}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTLZ).legalFor({{s64, s64}}); + }); // Build Instr auto MIBCTTZ = B.buildInstr(TargetOpcode::G_CTTZ, {LLT::scalar(64)}, {Copies[0]}); @@ -88,8 +90,9 @@ return; // Declare your legalization info - DefineLegalizerInfo( - A, { getActionDefinitionsBuilder(G_CTPOP).legalFor({s64}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTPOP).legalFor({{s64, s64}}); + }); // Build auto MIBCTTZ = B.buildInstr(TargetOpcode::G_CTTZ, {LLT::scalar(64)}, {Copies[0]}); @@ -117,8 +120,9 @@ return; // Declare your legalization info - DefineLegalizerInfo(A, - { getActionDefinitionsBuilder(G_CTTZ).legalFor({s64}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTTZ).legalFor({{s64, s64}}); + }); // Build auto MIBCTTZ = B.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, {LLT::scalar(64)}, {Copies[0]}); @@ -142,8 +146,9 @@ return; // Declare your legalization info - DefineLegalizerInfo( - A, { getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).legalFor({s64}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).legalFor({{s64, s64}}); + }); // Build auto MIBCTLZ = B.buildInstr(TargetOpcode::G_CTLZ, {LLT::scalar(64)}, {Copies[0]}); @@ -171,8 +176,9 @@ return; // Declare your legalization info - DefineLegalizerInfo( - A, { getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).libcallFor({s64}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).libcallFor({{s64, s64}}); + }); // Build auto MIBCTLZ = B.buildInstr(TargetOpcode::G_CTLZ, {LLT::scalar(64)}, {Copies[0]}); @@ -200,8 +206,9 @@ return; // Declare your legalization info - DefineLegalizerInfo(A, - { getActionDefinitionsBuilder(G_CTPOP).legalFor({s8}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTPOP).legalFor({{s8, s8}}); + }); // Build // Trunc it to s8. LLT s8{LLT::scalar(8)}; @@ -239,8 +246,9 @@ return; // Declare your legalization info - DefineLegalizerInfo(A, - { getActionDefinitionsBuilder(G_CTLZ).legalFor({s16}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTLZ).legalFor({{s16, s16}}); + }); // Build // Trunc it to s8. LLT s8{LLT::scalar(8)}; @@ -250,7 +258,7 @@ AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); - ASSERT_TRUE(Helper.widenScalar(*MIBCTLZ, 0, s16) == + ASSERT_TRUE(Helper.widenScalar(*MIBCTLZ, 1, s16) == LegalizerHelper::LegalizeResult::Legalized); auto CheckStr = R"( @@ -272,8 +280,9 @@ return; // Declare your legalization info - DefineLegalizerInfo( - A, { getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).legalFor({s16}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).legalFor({{s16, s16}}); + }); // Build // Trunc it to s8. LLT s8{LLT::scalar(8)}; @@ -284,7 +293,7 @@ AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); - ASSERT_TRUE(Helper.widenScalar(*MIBCTLZ_ZU, 0, s16) == + ASSERT_TRUE(Helper.widenScalar(*MIBCTLZ_ZU, 1, s16) == LegalizerHelper::LegalizeResult::Legalized); auto CheckStr = R"( @@ -306,8 +315,9 @@ return; // Declare your legalization info - DefineLegalizerInfo( - A, { getActionDefinitionsBuilder(G_CTPOP).legalFor({s16}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTPOP).legalFor({{s16, s16}}); + }); // Build // Trunc it to s8. LLT s8{LLT::scalar(8)}; @@ -317,7 +327,7 @@ AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); - ASSERT_TRUE(Helper.widenScalar(*MIBCTPOP, 0, s16) == + ASSERT_TRUE(Helper.widenScalar(*MIBCTPOP, 1, s16) == LegalizerHelper::LegalizeResult::Legalized); auto CheckStr = R"( @@ -337,8 +347,9 @@ return; // Declare your legalization info - DefineLegalizerInfo( - A, { getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).legalFor({s16}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).legalFor({{s16, s16}}); + }); // Build // Trunc it to s8. LLT s8{LLT::scalar(8)}; @@ -349,7 +360,7 @@ AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); - ASSERT_TRUE(Helper.widenScalar(*MIBCTTZ_ZERO_UNDEF, 0, s16) == + ASSERT_TRUE(Helper.widenScalar(*MIBCTTZ_ZERO_UNDEF, 1, s16) == LegalizerHelper::LegalizeResult::Legalized); auto CheckStr = R"( @@ -369,8 +380,9 @@ return; // Declare your legalization info - DefineLegalizerInfo(A, - { getActionDefinitionsBuilder(G_CTTZ).legalFor({s16}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTTZ).legalFor({{s16, s16}}); + }); // Build // Trunc it to s8. LLT s8{LLT::scalar(8)}; @@ -380,7 +392,7 @@ AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); - ASSERT_TRUE(Helper.widenScalar(*MIBCTTZ, 0, s16) == + ASSERT_TRUE(Helper.widenScalar(*MIBCTTZ, 1, s16) == LegalizerHelper::LegalizeResult::Legalized); auto CheckStr = R"( @@ -401,8 +413,9 @@ return; // Declare your legalization info - DefineLegalizerInfo(A, - { getActionDefinitionsBuilder(G_ADD).legalFor({s16}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_ADD).legalFor({{s16, s16}}); + }); // Build // Trunc it to s8. LLT s8{LLT::scalar(8)}; @@ -438,8 +451,9 @@ return; // Declare your legalization info - DefineLegalizerInfo(A, - { getActionDefinitionsBuilder(G_SUB).legalFor({s16}); }); + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_SUB).legalFor({{s16, s16}}); + }); // Build // Trunc it to s8. LLT s8{LLT::scalar(8)};