Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -221,6 +221,7 @@ LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult lowerBitcast(MachineInstr &MI); LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI); Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1935,6 +1935,39 @@ } } +static void getUnmergePieces(SmallVectorImpl &Pieces, + MachineIRBuilder &B, Register Src, LLT Ty) { + auto Unmerge = B.buildUnmerge(Ty, Src); + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + Pieces.push_back(Unmerge.getReg(I)); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerBitcast(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (SrcTy.isVector() && !DstTy.isVector()) { + SmallVector SrcRegs; + getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcTy.getElementType()); + MIRBuilder.buildMerge(Dst, SrcRegs); + MI.eraseFromParent(); + return Legalized; + } + + if (DstTy.isVector() && !SrcTy.isVector()) { + SmallVector SrcRegs; + getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType()); + MIRBuilder.buildMerge(Dst, SrcRegs); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { using namespace TargetOpcode; @@ -1943,6 +1976,8 @@ switch(MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_BITCAST: + return lowerBitcast(MI); case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: { Register QuotReg = MRI.createGenericVirtualRegister(Ty); Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -308,7 +308,9 @@ // Don't worry about the size constraint. .legalIf(all(isRegisterType(0), isRegisterType(1))) // FIXME: Testing hack - .legalForCartesianProduct({S16, LLT::vector(2, 8), }); + .legalForCartesianProduct({S16, LLT::vector(2, 8), }) + .lower(); + getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({S32, S64, S16}) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_bitcast_s32_to_v2s16 @@ -282,3 +282,154 @@ %1:_(<8 x s64>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... + +--- +name: test_bitcast_s24_to_v3s8 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_bitcast_s24_to_v3s8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[C]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[TRUNC]](s48) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16777215 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[TRUNC1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[SHL]](s64) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[COPY1]] + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[C2]](s64) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[TRUNC2]](s48) + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[OR]](s64) + ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[COPY2]], [[TRUNC3]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[SHL1]](s64) + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY2]], [[COPY3]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s48) = G_TRUNC [[OR1]](s64) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC4]](s48) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ANYEXT3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s24) = G_TRUNC %0 + %2:_(<3 x s8>) = G_BITCAST %1 + %3:_(<3 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2 = COPY %3 +... + +--- +name: test_bitcast_s48_to_v3s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_s48_to_v3s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY]](s64) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC]](s48) + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UV2]](s16), [[DEF]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s48) = G_TRUNC %0 + %2:_(<3 x s16>) = G_BITCAST %1 + %3:_(<3 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2 = COPY %3 +... + +--- +name: test_bitcast_v3s8_to_s24 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_bitcast_v3s8_to_s24 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY]](<3 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<3 x s8>) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s16) = G_ZEXT [[UV2]](s8) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND]], [[C]](s16) + ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL2]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s8>) = G_TRUNC %0 + %2:_(s24) = G_BITCAST %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_bitcast_v3s16_to_s48 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_bitcast_v3s16_to_s48 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[COPY]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>) = G_TRUNC %0 + %2:_(s48) = G_BITCAST %1 + %3:_(s64) = G_ANYEXT %2 + $vgpr0_vgpr1 = COPY %3 +...