Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -78,6 +78,9 @@ bool legalizeAtomicCmpXChg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -988,23 +988,29 @@ .clampNumElements(0, V2S64, V16S64) .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16)); - if (ST.hasScalarPackInsts()) - BuildVector.legalFor({V2S16, S32}); - - BuildVector - .minScalarSameAs(1, 0) - .legalIf(isRegisterType(0)) - .minScalarOrElt(0, S32); - if (ST.hasScalarPackInsts()) { + BuildVector + // FIXME: Should probably widen s1 vectors straight to s32 + .minScalarOrElt(0, S16) + // Widen source elements and produce a G_BUILD_VECTOR_TRUNC + .minScalar(1, S32); + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) .legalFor({V2S16, S32}) .lower(); + BuildVector.minScalarOrElt(0, S32); } else { + BuildVector.customFor({V2S16, S16}); + BuildVector.minScalarOrElt(0, S32); + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) + .customFor({V2S16, S32}) .lower(); } + BuildVector.legalIf(isRegisterType(0)); + + // FIXME: Clamp maximum size getActionDefinitionsBuilder(G_CONCAT_VECTORS) .legalIf(isRegisterType(0)); @@ -1173,6 +1179,8 @@ return legalizeFDIV(MI, MRI, B); case TargetOpcode::G_ATOMIC_CMPXCHG: return legalizeAtomicCmpXChg(MI, MRI, B); + case TargetOpcode::G_BUILD_VECTOR: + return legalizeBuildVector(MI, MRI, B); default: return false; } @@ -1837,6 +1845,27 @@ return true; } +// Turn an illegal packed v2s16 build vector into bit operations. +// TODO: This should probably be a bitcast action in LegalizerHelper. +bool AMDGPULegalizerInfo::legalizeBuildVector( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + const LLT S32 = LLT::scalar(32); + const LLT V2S16 = LLT::vector(2, 16); + assert(DstTy == V2S16); + + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + assert(MRI.getType(Src0) == LLT::scalar(16)); + + B.setInstr(MI); + auto Merge = B.buildMerge(S32, {Src0, Src1}); + B.buildBitcast(Dst, Merge); + MI.eraseFromParent(); + return true; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, Index: llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir @@ -77,15 +77,20 @@ ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[COPY4]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; CHECK: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[COPY6]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_TRUNC %0 %2:_(<2 x s16>) = G_SEXT %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -51,10 +51,14 @@ ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s8_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BUILD_VECTOR]] + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BITCAST]] ; CHECK: $vgpr0 = COPY [[AND]](<2 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir @@ -1,8 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# XFAIL: * # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- -name: test_build_vector_s_v2s16_s_s16_s_s16 +name: test_build_vector_trunc_s_v2s16_s_s32_s_s32 legalized: true regBankSelected: true tracksRegLiveness: true @@ -11,7 +12,7 @@ bb.0: liveins: $sgpr0, $sgpr1 - ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_s16 + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_s32 ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 @@ -19,16 +20,12 @@ ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 - - %2:sgpr(s16) = G_TRUNC %0 - %3:sgpr(s16) = G_TRUNC %1 - - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR %2, %3 + %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 S_ENDPGM 0, implicit %4 ... --- -name: test_build_vector_s_pack_lh +name: test_build_vector_trunc_s_pack_lh legalized: true regBankSelected: true tracksRegLiveness: true @@ -37,7 +34,7 @@ bb.0: liveins: $sgpr0, $sgpr1 - ; GFX9-LABEL: name: test_build_vector_s_pack_lh + ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_lh ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 @@ -49,16 +46,13 @@ %2:sgpr(s32) = G_CONSTANT i32 16 %3:sgpr(s32) = G_LSHR %1, %2 - %4:sgpr(s16) = G_TRUNC %0 - %5:sgpr(s16) = G_TRUNC %3 - - %6:sgpr(<2 x s16>) = G_BUILD_VECTOR %4, %5 - S_ENDPGM 0, implicit %6 + %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %3 + S_ENDPGM 0, implicit %4 ... # There is no s_pack_hl_b32 --- -name: test_build_vector_s_pack_lh_swapped +name: test_build_vector_trunc_s_pack_lh_swapped legalized: true regBankSelected: true tracksRegLiveness: true @@ -67,7 +61,7 @@ bb.0: liveins: $sgpr0, $sgpr1 - ; GFX9-LABEL: name: test_build_vector_s_pack_lh_swapped + ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_lh_swapped ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 @@ -81,15 +75,12 @@ %2:sgpr(s32) = G_CONSTANT i32 16 %3:sgpr(s32) = G_LSHR %1, %2 - %4:sgpr(s16) = G_TRUNC %0 - %5:sgpr(s16) = G_TRUNC %3 - - %6:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %4 - S_ENDPGM 0, implicit %6 + %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %0 + S_ENDPGM 0, implicit %4 ... --- -name: test_build_vector_s_pack_hh +name: test_build_vector_trunc_s_pack_hh legalized: true regBankSelected: true tracksRegLiveness: true @@ -98,7 +89,7 @@ bb.0: liveins: $sgpr0, $sgpr1 - ; GFX9-LABEL: name: test_build_vector_s_pack_hh + ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_hh ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 @@ -111,16 +102,13 @@ %3:sgpr(s32) = G_LSHR %0, %2 %4:sgpr(s32) = G_LSHR %1, %2 - %5:sgpr(s16) = G_TRUNC %3 - %6:sgpr(s16) = G_TRUNC %4 - - %7:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %6 - S_ENDPGM 0, implicit %7 + %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4 + S_ENDPGM 0, implicit %5 ... # TODO: Should this use an and instead? --- -name: test_build_vector_s_v2s16_s_s16_s_0_s16 +name: test_build_vector_trunc_s_v2s16_s_s32_s_0_s32 legalized: true regBankSelected: true tracksRegLiveness: true @@ -129,23 +117,20 @@ bb.0: liveins: $sgpr0 - ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_0_s16 + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_0_s32 ; GFX9: liveins: $sgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 - - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s16) = G_CONSTANT i16 0 - - %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2 - S_ENDPGM 0, implicit %3 + %1:sgpr(s32) = G_CONSTANT i32 0 + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 + S_ENDPGM 0, implicit %2 ... --- -name: test_build_vector_s_v2s16_s_0_s16_s_s16 +name: test_build_vector_trunc_s_v2s16_s_0_s32_s_s32 legalized: true regBankSelected: true tracksRegLiveness: true @@ -154,22 +139,21 @@ bb.0: liveins: $sgpr0 - ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_0_s16_s_s16 + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_0_s32_s_s32 ; GFX9: liveins: $sgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]] %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_CONSTANT i16 0 - %2:sgpr(s16) = G_TRUNC %0 + %1:sgpr(s32) = G_CONSTANT i32 0 - %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2 - S_ENDPGM 0, implicit %3 + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %1, %0 + S_ENDPGM 0, implicit %2 ... --- -name: test_build_vector_v_v2s16_v_s16_s_undef_s16 +name: test_build_vector_v_v2s16_v_s32_s_undef_s32 legalized: true regBankSelected: true tracksRegLiveness: true @@ -178,21 +162,20 @@ bb.0: liveins: $vgpr0 - ; GFX9-LABEL: name: test_build_vector_v_v2s16_v_s16_s_undef_s16 + ; GFX9-LABEL: name: test_build_vector_v_v2s16_v_s32_s_undef_s32 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: S_ENDPGM 0, implicit [[COPY]] %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:sgpr(s16) = G_IMPLICIT_DEF + %1:sgpr(s32) = G_IMPLICIT_DEF - %3:vgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2 - S_ENDPGM 0, implicit %3 + %2:vgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 + S_ENDPGM 0, implicit %2 ... --- -name: test_build_vector_s_v2s16_s_s16_s_undef_s16 +name: test_build_vector_trunc_s_v2s16_s_s32_s_undef_s32 legalized: true regBankSelected: true tracksRegLiveness: true @@ -201,21 +184,20 @@ bb.0: liveins: $sgpr0 - ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_undef_s16 + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_undef_s32 ; GFX9: liveins: $sgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: S_ENDPGM 0, implicit [[COPY]] %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s16) = G_IMPLICIT_DEF + %1:sgpr(s32) = G_IMPLICIT_DEF - %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2 - S_ENDPGM 0, implicit %3 + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 + S_ENDPGM 0, implicit %2 ... --- -name: test_build_vector_s_v2s16_s_undef_s16_s_s16 +name: test_build_vector_trunc_s_v2s16_s_undef_s32_s_s32 legalized: true regBankSelected: true tracksRegLiveness: true @@ -224,16 +206,15 @@ bb.0: liveins: $sgpr0 - ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_undef_s16_s_s16 + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_undef_s32_s_s32 ; GFX9: liveins: $sgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]] %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_IMPLICIT_DEF - %2:sgpr(s16) = G_TRUNC %0 + %1:sgpr(s32) = G_IMPLICIT_DEF - %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2 - S_ENDPGM 0, implicit %3 + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %1, %0 + S_ENDPGM 0, implicit %2 ... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -127,13 +127,18 @@ ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_add_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -148,8 +153,12 @@ ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ADD]](s16), [[ADD1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_add_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -164,8 +173,10 @@ ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] ; GFX9: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ADD]](s16), [[ADD1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_ADD %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -500,16 +500,20 @@ ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[AND1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_ashr_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -524,8 +528,12 @@ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC2]](s16) ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_ashr_v2s16_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -554,14 +562,19 @@ ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[UV]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[UV1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_ashr_v2s16_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -573,8 +586,12 @@ ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[UV]](s32) ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[UV1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_ashr_v2s16_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -586,8 +603,10 @@ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[UV]](s32) ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[UV1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 %2:_(<2 x s16>) = G_ASHR %0, %1 @@ -627,28 +646,35 @@ ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[AND1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[ASHR4]], [[AND2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR5]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL4]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -679,13 +705,20 @@ ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC3]](s16) ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR2]](s16), [[DEF2]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -756,31 +789,39 @@ ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[AND1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[ASHR4]], [[AND2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR5]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C]](s32) ; SI: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[ASHR6]], [[AND3]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[ASHR7]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_ashr_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -808,9 +849,17 @@ ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC5]](s16) ; VI: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC6]](s16) ; VI: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC3]], [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR2]](s16), [[ASHR3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_ashr_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -334,24 +334,31 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY]](s64) ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC]](s48) - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UV2]](s16), [[DEF]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s48) = G_TRUNC %0 %2:_(<3 x s16>) = G_BITCAST %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitreverse.mir @@ -93,13 +93,18 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[COPY1]] ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE]], [[C]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[BITREVERSE1:%[0-9]+]]:_(s32) = G_BITREVERSE [[COPY2]] ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITREVERSE1]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_BITREVERSE %0 $vgpr0 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -94,13 +94,18 @@ ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[COPY3]] ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP1]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_BSWAP %0 $vgpr0 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir @@ -822,303 +822,3 @@ %4:_(<4 x s128>) = G_BUILD_VECTOR %0, %1, %2, %3 S_NOP 0, implicit %4 ... - ---- -name: build_vector_v2s16 -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; CHECK-LABEL: name: build_vector_v2s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 - S_NOP 0, implicit %4 -... - ---- -name: build_vector_v3s16 -body: | - bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2 - - ; CHECK-LABEL: name: build_vector_v3s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CHECK: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s16) = G_TRUNC %0 - %4:_(s16) = G_TRUNC %1 - %5:_(s16) = G_TRUNC %2 - %6:_(<3 x s16>) = G_BUILD_VECTOR %3, %4, %5 - S_NOP 0, implicit %6 -... - ---- -name: build_vector_v4s16 -body: | - bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - - ; CHECK-LABEL: name: build_vector_v4s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s16) = G_TRUNC %0 - %5:_(s16) = G_TRUNC %1 - %6:_(s16) = G_TRUNC %2 - %7:_(s16) = G_TRUNC %3 - %8:_(<4 x s16>) = G_BUILD_VECTOR %4, %5, %6, %7 - S_NOP 0, implicit %8 -... - ---- -name: build_vector_v5s16 -body: | - bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - - ; CHECK-LABEL: name: build_vector_v5s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; CHECK: S_NOP 0, implicit [[EXTRACT]](<5 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s16) = G_TRUNC %0 - %6:_(s16) = G_TRUNC %1 - %7:_(s16) = G_TRUNC %2 - %8:_(s16) = G_TRUNC %3 - %9:_(s16) = G_TRUNC %4 - %10:_(<5 x s16>) = G_BUILD_VECTOR %5, %6, %7, %8, %9 - S_NOP 0, implicit %10 -... - ---- -name: build_vector_v7s16 -body: | - bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 - - ; CHECK-LABEL: name: build_vector_v7s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<7 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<8 x s16>), 0 - ; CHECK: S_NOP 0, implicit [[EXTRACT]](<7 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s16) = G_TRUNC %0 - %8:_(s16) = G_TRUNC %1 - %9:_(s16) = G_TRUNC %2 - %10:_(s16) = G_TRUNC %3 - %11:_(s16) = G_TRUNC %4 - %12:_(s16) = G_TRUNC %5 - %13:_(s16) = G_TRUNC %6 - %14:_(<7 x s16>) = G_BUILD_VECTOR %7, %8, %9, %10, %11, %12, %13 - S_NOP 0, implicit %14 -... - ---- -name: build_vector_v8s16 -body: | - bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 - - ; CHECK-LABEL: name: build_vector_v8s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) - ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s16) = G_TRUNC %0 - %9:_(s16) = G_TRUNC %1 - %10:_(s16) = G_TRUNC %2 - %11:_(s16) = G_TRUNC %3 - %12:_(s16) = G_TRUNC %4 - %13:_(s16) = G_TRUNC %5 - %14:_(s16) = G_TRUNC %6 - %15:_(s16) = G_TRUNC %7 - %16:_(<8 x s16>) = G_BUILD_VECTOR %8, %9, %10, %11, %12, %13, %14, %15 - S_NOP 0, implicit %16 -... - ---- -name: build_vector_v16s16 -body: | - bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 - - ; CHECK-LABEL: name: build_vector_v16s16 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC12]](s16), [[TRUNC13]](s16) - ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC14]](s16), [[TRUNC15]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = COPY $vgpr3 - %4:_(s32) = COPY $vgpr4 - %5:_(s32) = COPY $vgpr5 - %6:_(s32) = COPY $vgpr6 - %7:_(s32) = COPY $vgpr7 - %8:_(s32) = COPY $vgpr8 - %9:_(s32) = COPY $vgpr9 - %10:_(s32) = COPY $vgpr10 - %11:_(s32) = COPY $vgpr11 - %12:_(s32) = COPY $vgpr12 - %13:_(s32) = COPY $vgpr13 - %14:_(s32) = COPY $vgpr14 - %15:_(s32) = COPY $vgpr15 - %16:_(s16) = G_TRUNC %0 - %17:_(s16) = G_TRUNC %1 - %18:_(s16) = G_TRUNC %2 - %19:_(s16) = G_TRUNC %3 - %20:_(s16) = G_TRUNC %4 - %21:_(s16) = G_TRUNC %5 - %22:_(s16) = G_TRUNC %6 - %23:_(s16) = G_TRUNC %7 - %24:_(s16) = G_TRUNC %8 - %25:_(s16) = G_TRUNC %9 - %26:_(s16) = G_TRUNC %10 - %27:_(s16) = G_TRUNC %11 - %28:_(s16) = G_TRUNC %12 - %29:_(s16) = G_TRUNC %13 - %30:_(s16) = G_TRUNC %14 - %31:_(s16) = G_TRUNC %15 - %32:_(<16 x s16>) = G_BUILD_VECTOR %16, %17, %18, %19, %20, %21, %22, %23, %24, %25, %26, %27, %28, %29, %30, %31 - S_NOP 0, implicit %32 -... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir @@ -0,0 +1,551 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX78 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX78 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: build_vector_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX78-LABEL: name: build_vector_v2s16 + ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX78: S_NOP 0, implicit [[BITCAST]](<2 x s16>) + ; GFX9-LABEL: name: build_vector_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 + S_NOP 0, implicit %4 +... + +--- +name: build_vector_v3s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX78-LABEL: name: build_vector_v3s16 + ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX78: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX78: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] + ; GFX78: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; GFX78: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX78: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) + ; GFX9-LABEL: name: build_vector_v3s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s16) = G_TRUNC %0 + %4:_(s16) = G_TRUNC %1 + %5:_(s16) = G_TRUNC %2 + %6:_(<3 x s16>) = G_BUILD_VECTOR %3, %4, %5 + S_NOP 0, implicit %6 +... + +--- +name: build_vector_v4s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GFX78-LABEL: name: build_vector_v4s16 + ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX78: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX78: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] + ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX78: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; GFX78: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]] + ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-LABEL: name: build_vector_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s16) = G_TRUNC %0 + %5:_(s16) = G_TRUNC %1 + %6:_(s16) = G_TRUNC %2 + %7:_(s16) = G_TRUNC %3 + %8:_(<4 x s16>) = G_BUILD_VECTOR %4, %5, %6, %7 + S_NOP 0, implicit %8 +... + +--- +name: build_vector_v5s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; GFX78-LABEL: name: build_vector_v5s16 + ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX78: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX78: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] + ; GFX78: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX78: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]] + ; GFX78: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C]] + ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX78: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; GFX78: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C]] + ; GFX78: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX78: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; GFX78: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX78: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; GFX78: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; GFX78: S_NOP 0, implicit [[EXTRACT]](<5 x s16>) + ; GFX9-LABEL: name: build_vector_v5s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[DEF]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<5 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s16) = G_TRUNC %0 + %6:_(s16) = G_TRUNC %1 + %7:_(s16) = G_TRUNC %2 + %8:_(s16) = G_TRUNC %3 + %9:_(s16) = G_TRUNC %4 + %10:_(<5 x s16>) = G_BUILD_VECTOR %5, %6, %7, %8, %9 + S_NOP 0, implicit %10 +... + +--- +name: build_vector_v7s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + + ; GFX78-LABEL: name: build_vector_v7s16 + ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX78: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX78: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX78: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX78: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]] + ; GFX78: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C]] + ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX78: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C]] + ; GFX78: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] + ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX78: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; GFX78: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; GFX78: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; GFX78: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; GFX78: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; GFX78: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX78: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX78: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) + ; GFX78: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; GFX78: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX78: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; GFX78: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX78: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX78: [[EXTRACT:%[0-9]+]]:_(<7 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<8 x s16>), 0 + ; GFX78: S_NOP 0, implicit [[EXTRACT]](<7 x s16>) + ; GFX9-LABEL: name: build_vector_v7s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<7 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<8 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<7 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s16) = G_TRUNC %0 + %8:_(s16) = G_TRUNC %1 + %9:_(s16) = G_TRUNC %2 + %10:_(s16) = G_TRUNC %3 + %11:_(s16) = G_TRUNC %4 + %12:_(s16) = G_TRUNC %5 + %13:_(s16) = G_TRUNC %6 + %14:_(<7 x s16>) = G_BUILD_VECTOR %7, %8, %9, %10, %11, %12, %13 + S_NOP 0, implicit %14 +... + +--- +name: build_vector_v8s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + + ; GFX78-LABEL: name: build_vector_v8s16 + ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX78: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX78: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX78: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX78: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX78: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C]] + ; GFX78: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C]] + ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX78: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] + ; GFX78: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX78: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; GFX78: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; GFX78: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; GFX78: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; GFX78: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; GFX78: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX78: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX78: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) + ; GFX78: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; GFX78: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32) + ; GFX78: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]] + ; GFX78: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) + ; GFX78: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX78: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>) + ; GFX9-LABEL: name: build_vector_v8s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s32) = COPY $vgpr7 + %8:_(s16) = G_TRUNC %0 + %9:_(s16) = G_TRUNC %1 + %10:_(s16) = G_TRUNC %2 + %11:_(s16) = G_TRUNC %3 + %12:_(s16) = G_TRUNC %4 + %13:_(s16) = G_TRUNC %5 + %14:_(s16) = G_TRUNC %6 + %15:_(s16) = G_TRUNC %7 + %16:_(<8 x s16>) = G_BUILD_VECTOR %8, %9, %10, %11, %12, %13, %14, %15 + S_NOP 0, implicit %16 +... + +--- +name: build_vector_v16s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + + ; GFX78-LABEL: name: build_vector_v16s16 + ; GFX78: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX78: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX78: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX78: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX78: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX78: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX78: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX78: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX78: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX78: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX78: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX78: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX78: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX78: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX78: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX78: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX78: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX78: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX78: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C]] + ; GFX78: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX78: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C]] + ; GFX78: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX78: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX78: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX78: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX78: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX78: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C]] + ; GFX78: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GFX78: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C]] + ; GFX78: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; GFX78: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX78: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX78: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; GFX78: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C]] + ; GFX78: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; GFX78: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C]] + ; GFX78: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; GFX78: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX78: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX78: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) + ; GFX78: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C]] + ; GFX78: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32) + ; GFX78: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C]] + ; GFX78: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) + ; GFX78: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX78: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX78: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) + ; GFX78: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C]] + ; GFX78: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) + ; GFX78: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C]] + ; GFX78: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; GFX78: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GFX78: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX78: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) + ; GFX78: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C]] + ; GFX78: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) + ; GFX78: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C]] + ; GFX78: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; GFX78: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; GFX78: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX78: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) + ; GFX78: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C]] + ; GFX78: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) + ; GFX78: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C]] + ; GFX78: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) + ; GFX78: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] + ; GFX78: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; GFX78: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) + ; GFX78: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C]] + ; GFX78: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) + ; GFX78: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C]] + ; GFX78: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C1]](s32) + ; GFX78: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] + ; GFX78: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; GFX78: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX78: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>) + ; GFX9-LABEL: name: build_vector_v16s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) + ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) + ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32) + ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) + ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32) + ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) + ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32) + ; GFX9: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) + ; GFX9: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[COPY27]](s32) + ; GFX9: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) + ; GFX9: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY28]](s32), [[COPY29]](s32) + ; GFX9: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) + ; GFX9: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY30]](s32), [[COPY31]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) + ; GFX9: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s32) = COPY $vgpr7 + %8:_(s32) = COPY $vgpr8 + %9:_(s32) = COPY $vgpr9 + %10:_(s32) = COPY $vgpr10 + %11:_(s32) = COPY $vgpr11 + %12:_(s32) = COPY $vgpr12 + %13:_(s32) = COPY $vgpr13 + %14:_(s32) = COPY $vgpr14 + %15:_(s32) = COPY $vgpr15 + %16:_(s16) = G_TRUNC %0 + %17:_(s16) = G_TRUNC %1 + %18:_(s16) = G_TRUNC %2 + %19:_(s16) = G_TRUNC %3 + %20:_(s16) = G_TRUNC %4 + %21:_(s16) = G_TRUNC %5 + %22:_(s16) = G_TRUNC %6 + %23:_(s16) = G_TRUNC %7 + %24:_(s16) = G_TRUNC %8 + %25:_(s16) = G_TRUNC %9 + %26:_(s16) = G_TRUNC %10 + %27:_(s16) = G_TRUNC %11 + %28:_(s16) = G_TRUNC %12 + %29:_(s16) = G_TRUNC %13 + %30:_(s16) = G_TRUNC %14 + %31:_(s16) = G_TRUNC %15 + %32:_(<16 x s16>) = G_BUILD_VECTOR %16, %17, %18, %19, %20, %21, %22, %23, %24, %25, %26, %27, %28, %29, %30, %31 + S_NOP 0, implicit %32 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -144,15 +144,19 @@ ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND1]](s32) ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF1]], [[C]] ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_CTLZ_ZERO_UNDEF %0 $vgpr0 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -144,15 +144,19 @@ ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ1]], [[C]] ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_CTLZ %0 $vgpr0 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -141,14 +141,18 @@ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[AND1]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_CTPOP %0 $vgpr0 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir @@ -141,14 +141,18 @@ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[AND1]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_CTTZ_ZERO_UNDEF %0 $vgpr0 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -145,15 +145,19 @@ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C2]] ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[C2]] ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[OR1]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ1]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_CTTZ %0 $vgpr0 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -516,13 +516,21 @@ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT2]](s16), [[DEF1]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT2]](s16) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32 ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT2]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) @@ -546,14 +554,26 @@ ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s1) ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s1) ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s1) - ; CHECK: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT2]](s16), [[ANYEXT3]](s16) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT4]](s16), [[DEF1]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT2]](s16) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT3]](s16) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT4]](s16) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] + ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF2]](<6 x s32>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF1]](<6 x s32>) ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC1]], [[EXTRACT1]](<5 x s16>), 0 ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT2]](s16) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -285,8 +285,12 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fadd_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -301,8 +305,12 @@ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC2]] ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fadd_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -358,10 +366,17 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF4]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_fadd_v3s16 @@ -393,10 +408,17 @@ ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC3]] ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC4]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF4]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_fadd_v3s16 @@ -479,9 +501,17 @@ ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fadd_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -509,9 +539,17 @@ ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC5]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC6]] ; VI: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fadd_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -185,8 +185,12 @@ ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_fcanonicalize_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -196,8 +200,12 @@ ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_fcanonicalize_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] @@ -235,10 +243,17 @@ ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE2]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fcanonicalize_v3s16 @@ -258,10 +273,17 @@ ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE2]](s16), [[DEF2]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fcanonicalize_v3s16 @@ -321,9 +343,17 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fcanonicalize_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -341,9 +371,17 @@ ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE2]](s16), [[FCANONICALIZE3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fcanonicalize_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir @@ -138,8 +138,8 @@ ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV1]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9: S_NOP 0, implicit [[TRUNC]](<2 x s1>) %0:_(s32) = G_CONSTANT i32 0 %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0 @@ -183,8 +183,8 @@ ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV1]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s1>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9: S_NOP 0, implicit [[TRUNC]](<2 x s1>) %0:_(s32) = G_CONSTANT i32 0 %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0 @@ -237,9 +237,13 @@ ; GFX9: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s1>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF1]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s1>) = G_TRUNC [[EXTRACT]](<3 x s16>) ; GFX9: S_NOP 0, implicit [[TRUNC]](<3 x s1>) %0:_(<3 x s32>) = G_IMPLICIT_DEF %1:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 @@ -299,10 +303,12 @@ ; GFX9: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV3]](s32), [[UV7]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: S_NOP 0, implicit [[TRUNC]](<4 x s1>) %0:_(p1) = G_IMPLICIT_DEF %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load 16) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -456,34 +456,44 @@ ; SI-LABEL: name: test_copysign_v2s16_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; SI: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; SI: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BUILD_VECTOR]] - ; SI: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[OR]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BITCAST1]] + ; SI: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BITCAST]] + ; SI: [[OR2:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] + ; SI: $vgpr0 = COPY [[OR2]](<2 x s16>) ; VI-LABEL: name: test_copysign_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; VI: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; VI: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BUILD_VECTOR]] - ; VI: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] - ; VI: $vgpr0 = COPY [[OR]](<2 x s16>) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BITCAST1]] + ; VI: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BITCAST]] + ; VI: [[OR2:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] + ; VI: $vgpr0 = COPY [[OR2]](<2 x s16>) ; GFX9-LABEL: name: test_copysign_v2s16_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BUILD_VECTOR]] + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[BUILD_VECTOR_TRUNC]] ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[AND]], [[AND1]] ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir @@ -272,8 +272,12 @@ ; SI: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) ; SI: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_fcos_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -288,8 +292,12 @@ ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] ; VI: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16) ; VI: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_fcos_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -302,8 +310,10 @@ ; GFX9: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s16) ; GFX9: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FCOS %0 $vgpr0 = COPY %1 @@ -344,10 +354,17 @@ ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fcos_v3s16 @@ -374,10 +391,17 @@ ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; VI: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s16) - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[DEF2]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fcos_v3s16 @@ -401,10 +425,13 @@ ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s16) ; GFX9: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16) - ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[DEF2]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF @@ -451,9 +478,17 @@ ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s32) ; SI: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT6]](s32) ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fcos_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -480,9 +515,17 @@ ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; VI: [[INT6:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s16) ; VI: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT6]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[INT7]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[INT7]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fcos_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -505,9 +548,13 @@ ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16) ; GFX9: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[INT3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FCOS %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -1265,8 +1265,12 @@ ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fdiv_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1291,8 +1295,12 @@ ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] ; VI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) ; VI: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fdiv_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1317,8 +1325,10 @@ ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s16 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1335,8 +1345,10 @@ ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) - ; GFX9-UNSAFE: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) + ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-UNSAFE: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX10-LABEL: name: test_fdiv_v2s16 ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1361,8 +1373,10 @@ ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] ; GFX10: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) ; GFX10: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_FDIV %0, %1 @@ -1447,10 +1461,17 @@ ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) - ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF4]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_fdiv_v3s16 @@ -1497,10 +1518,17 @@ ; VI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] ; VI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[DEF4]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_fdiv_v3s16 @@ -1547,10 +1575,13 @@ ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] ; GFX9: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) ; GFX9: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; GFX9: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[DEF4]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX9: [[DEF4:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s16 @@ -1585,10 +1616,13 @@ ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] - ; GFX9-UNSAFE: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) - ; GFX9-UNSAFE: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[DEF4]](s16) - ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) + ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-UNSAFE: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) + ; GFX9-UNSAFE: [[DEF4:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF4]](s32) + ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-UNSAFE: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9-UNSAFE: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX10-LABEL: name: test_fdiv_v3s16 @@ -1635,10 +1669,13 @@ ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] ; GFX10: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) ; GFX10: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; GFX10: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[DEF4]](s16) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX10: [[DEF4:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF4]](s32) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX10: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1736,9 +1773,17 @@ ; SI: [[INT26:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA19]](s32), [[FMA16]](s32), [[FMA18]](s32), [[INT24]](s1) ; SI: [[INT27:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT26]](s32), [[FPEXT7]](s32), [[FPEXT6]](s32) ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT27]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fdiv_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -1786,9 +1831,17 @@ ; VI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] ; VI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) ; VI: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[INT7]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[INT7]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fdiv_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -1836,9 +1889,13 @@ ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] ; GFX9: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) ; GFX9: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[INT7]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT7]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v4s16 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -1870,9 +1927,13 @@ ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] ; GFX9-UNSAFE: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC7]](s16) ; GFX9-UNSAFE: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[INT3]] - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) - ; GFX9-UNSAFE: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[FMUL3]](s16) - ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) + ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-UNSAFE: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) + ; GFX9-UNSAFE: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL3]](s16) + ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX10-LABEL: name: test_fdiv_v4s16 ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -1920,9 +1981,13 @@ ; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] ; GFX10: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) ; GFX10: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[INT7]](s16) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX10: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT7]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir @@ -200,8 +200,12 @@ ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_ffloor_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -211,8 +215,12 @@ ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_ffloor_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -222,8 +230,10 @@ ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FFLOOR %0 $vgpr0 = COPY %1 @@ -257,10 +267,17 @@ ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_ffloor_v3s16 @@ -280,10 +297,17 @@ ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[DEF2]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_ffloor_v3s16 @@ -303,10 +327,13 @@ ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[DEF2]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF @@ -344,9 +371,17 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FFLOOR3:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_ffloor_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -364,9 +399,17 @@ ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] ; VI: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[FFLOOR3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_ffloor_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -384,9 +427,13 @@ ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] ; GFX9: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[FFLOOR3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FFLOOR %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -342,8 +342,12 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; VI-LABEL: name: test_fma_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -363,8 +367,12 @@ ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC2]], [[TRUNC4]] ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-LABEL: name: test_fma_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -436,10 +444,17 @@ ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI: [[DEF6:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF6]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; SI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT3]](<3 x s16>) ; VI-LABEL: name: test_fma_v3s16 @@ -483,10 +498,17 @@ ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]] ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]] - ; VI: [[DEF6:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA2]](s16), [[DEF6]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; VI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT3]](<3 x s16>) ; GFX9-LABEL: name: test_fma_v3s16 @@ -592,9 +614,17 @@ ; SI: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fma_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -632,9 +662,17 @@ ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] ; VI: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA2]](s16), [[FMA3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMA3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fma_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir @@ -105,8 +105,12 @@ ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX7-LABEL: name: test_fmad_v2s16_flush ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -126,8 +130,12 @@ ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX7: [[FMAD:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC]], [[TRUNC2]], [[TRUNC4]] ; GFX7: [[FMAD1:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] - ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD]](s16), [[FMAD1]](s16) - ; GFX7: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD]](s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD1]](s16) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX7: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX10-LABEL: name: test_fmad_v2s16_flush ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -149,8 +157,10 @@ ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC4]] ; GFX10: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] ; GFX10: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC5]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -232,9 +242,17 @@ ; GFX6: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC7]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX7-LABEL: name: test_fmad_v4s16_flush ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -272,9 +290,17 @@ ; GFX7: [[FMAD1:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] ; GFX7: [[FMAD2:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] ; GFX7: [[FMAD3:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD]](s16), [[FMAD1]](s16) - ; GFX7: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD2]](s16), [[FMAD3]](s16) - ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD]](s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD1]](s16) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD2]](s16) + ; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAD3]](s16) + ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX7: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX7: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX10-LABEL: name: test_fmad_v4s16_flush ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -316,9 +342,13 @@ ; GFX10: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC10]] ; GFX10: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] ; GFX10: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC11]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) + ; GFX10: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -491,8 +521,12 @@ ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX7-LABEL: name: test_fmad_v2s16_denorm ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -510,8 +544,12 @@ ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %10(s16), %11(s16) - ; GFX7: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %10(s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT %11(s16) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX7: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] ; GFX7: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC5]] ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] @@ -537,8 +575,10 @@ ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC4]] ; GFX10: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] ; GFX10: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC5]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -589,8 +629,12 @@ ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT6]], [[FPEXT7]] ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX7-LABEL: name: test_fmad_v2s16_denorm_flags ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -608,8 +652,12 @@ ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %10(s16), %11(s16) - ; GFX7: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %10(s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT %11(s16) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX7: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC3]] ; GFX7: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC5]] ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC2]] @@ -635,8 +683,10 @@ ; GFX10: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC4]] ; GFX10: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC3]] ; GFX10: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC5]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -718,9 +768,17 @@ ; GFX6: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC7]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX7-LABEL: name: test_fmad_v4s16_denorm ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -754,9 +812,17 @@ ; GFX7: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; GFX7: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX7: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16) - ; GFX7: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %18(s16), %19(s16) - ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %16(s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT %17(s16) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT %18(s16) + ; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT %19(s16) + ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX7: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX7: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] ; GFX7: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC11]] @@ -806,9 +872,13 @@ ; GFX10: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC10]] ; GFX10: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] ; GFX10: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC11]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) + ; GFX10: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -892,9 +962,17 @@ ; GFX6: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT14]], [[FPEXT15]] ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC7]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX7-LABEL: name: test_fmad_v4s16_denorm_flags ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -928,9 +1006,17 @@ ; GFX7: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; GFX7: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX7: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16) - ; GFX7: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %18(s16), %19(s16) - ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %16(s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT %17(s16) + ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX7: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT %18(s16) + ; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT %19(s16) + ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX7: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX7: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC3]], [[TRUNC7]] ; GFX7: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC11]] @@ -980,9 +1066,13 @@ ; GFX10: [[FADD2:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL2]], [[TRUNC10]] ; GFX10: [[FMUL3:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC3]], [[TRUNC7]] ; GFX10: [[FADD3:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL3]], [[TRUNC11]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) + ; GFX10: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -341,8 +341,12 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fminnum_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -361,8 +365,12 @@ ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fminnum_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -420,13 +428,20 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -463,13 +478,20 @@ ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[DEF2]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -547,9 +569,17 @@ ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -585,9 +615,17 @@ ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[FMINNUM_IEEE3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -341,8 +341,12 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fminnum_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -361,8 +365,12 @@ ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fminnum_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -420,13 +428,20 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -463,13 +478,20 @@ ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[DEF2]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -547,9 +569,17 @@ ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -585,9 +615,17 @@ ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[FMINNUM_IEEE3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -284,8 +284,12 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[FPEXT3]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fmul_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -300,8 +304,12 @@ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fmul_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -357,10 +365,17 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF4]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_fmul_v3s16 @@ -392,10 +407,17 @@ ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC3]] ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC4]] ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[DEF4]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_fmul_v3s16 @@ -478,9 +500,17 @@ ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fmul_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -508,9 +538,17 @@ ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[FMUL3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fmul_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir @@ -62,9 +62,17 @@ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV]](s32) ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV1]](s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_FPTRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir @@ -272,8 +272,12 @@ ; SI: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) ; SI: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_fsin_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -288,8 +292,12 @@ ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] ; VI: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16) ; VI: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_fsin_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -302,8 +310,10 @@ ; GFX9: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s16) ; GFX9: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FSIN %0 $vgpr0 = COPY %1 @@ -344,10 +354,17 @@ ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fsin_v3s16 @@ -374,10 +391,17 @@ ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; VI: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s16) - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[DEF2]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fsin_v3s16 @@ -401,10 +425,13 @@ ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s16) ; GFX9: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16) - ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[DEF2]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF @@ -451,9 +478,17 @@ ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s32) ; SI: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT6]](s32) ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -480,9 +515,17 @@ ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; VI: [[INT6:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s16) ; VI: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT6]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[INT7]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[INT7]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -505,9 +548,13 @@ ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16) ; GFX9: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[INT3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FSIN %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -200,8 +200,12 @@ ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_fsqrt_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -211,8 +215,12 @@ ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_fsqrt_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -222,8 +230,10 @@ ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FSQRT %0 $vgpr0 = COPY %1 @@ -257,10 +267,17 @@ ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fsqrt_v3s16 @@ -280,10 +297,17 @@ ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[DEF2]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fsqrt_v3s16 @@ -303,10 +327,13 @@ ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[DEF2]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF @@ -344,9 +371,17 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FSQRT3:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsqrt_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -364,9 +399,17 @@ ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] ; VI: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[FSQRT3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsqrt_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -384,9 +427,13 @@ ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] ; GFX9: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[FSQRT3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FSQRT %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -327,8 +327,12 @@ ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fsub_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -344,8 +348,12 @@ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC2]] ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fsub_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -405,10 +413,17 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF4]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_fsub_v3s16 @@ -443,10 +458,17 @@ ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF4]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_fsub_v3s16 @@ -481,10 +503,13 @@ ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] ; GFX9: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX9: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF4]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) + ; GFX9: [[DEF4:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF @@ -541,9 +566,17 @@ ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG3]](s16) ; SI: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsub_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -575,9 +608,17 @@ ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] ; VI: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]] ; VI: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsub_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -609,9 +650,13 @@ ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] ; GFX9: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]] ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -202,13 +202,11 @@ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV]] ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV1]] + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = G_CONSTANT i32 0 @@ -273,16 +271,13 @@ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = G_IMPLICIT_DEF @@ -358,19 +353,15 @@ ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C]] ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT3]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) %0:_(p1) = G_IMPLICIT_DEF @@ -644,13 +635,11 @@ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[UV2]] ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[UV3]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[C]](s32) ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[C]](s32) ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) @@ -710,13 +699,11 @@ ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p999), [[UV2]] ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p999), [[UV3]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[C]](s32) ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[C]](s32) ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir @@ -429,8 +429,12 @@ ; GFX6: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX8-LABEL: name: test_intrinsic_round_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -452,8 +456,12 @@ ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[FCMP2]], [[FCMP3]] ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[AND1]](s1), [[C2]], [[C1]] ; GFX8: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_intrinsic_round_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -475,8 +483,10 @@ ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[FCMP2]], [[FCMP3]] ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[AND1]](s1), [[C2]], [[C1]] ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_INTRINSIC_ROUND %0 $vgpr0 = COPY %1 @@ -549,13 +559,20 @@ ; GFX6: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT2]](s16) ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] ; GFX6: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[DEF1]](s16) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX6: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 ; GFX6: $vgpr0_vgpr1 = COPY [[INSERT1]](<4 x s16>) ; GFX8-LABEL: name: test_intrinsic_round_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -591,13 +608,20 @@ ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[FCMP4]], [[FCMP5]] ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[AND2]](s1), [[C2]], [[C1]] ; GFX8: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX8: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF1]](s16) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32) + ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX8: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 ; GFX8: $vgpr0_vgpr1 = COPY [[INSERT1]](<4 x s16>) ; GFX9-LABEL: name: test_intrinsic_round_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -633,10 +657,13 @@ ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[FCMP4]], [[FCMP5]] ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[AND2]](s1), [[C2]], [[C1]] ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX9: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF1]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) + ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF1]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 @@ -729,9 +756,17 @@ ; GFX6: [[FPEXT27:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT3]](s16) ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT26]], [[FPEXT27]] ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC7]](s16) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: test_intrinsic_round_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -771,9 +806,17 @@ ; GFX8: [[AND3:%[0-9]+]]:_(s1) = G_AND [[FCMP6]], [[FCMP7]] ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[AND3]](s1), [[C2]], [[C1]] ; GFX8: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_intrinsic_round_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -813,9 +856,13 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s1) = G_AND [[FCMP6]], [[FCMP7]] ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[AND3]](s1), [[C2]], [[C1]] ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_INTRINSIC_ROUND %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -4209,10 +4209,12 @@ ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) @@ -4253,10 +4255,12 @@ ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) @@ -4308,8 +4312,8 @@ ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) @@ -4334,8 +4338,8 @@ ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) @@ -4541,10 +4545,12 @@ ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v4s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -4579,10 +4585,12 @@ ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 4) @@ -4647,10 +4655,12 @@ ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v4s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -4685,10 +4695,12 @@ ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 1, addrspace 4) @@ -4829,53 +4841,71 @@ ; CI-LABEL: name: test_load_constant_v2s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_constant_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_constant_v2s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v2s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v2s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 4) $vgpr0 = COPY %1 @@ -4916,8 +4946,13 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_constant_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -4943,8 +4978,13 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_constant_v2s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -4970,8 +5010,10 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v2s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -5001,8 +5043,13 @@ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v2s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -5028,8 +5075,10 @@ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 4) $vgpr0 = COPY %1 @@ -5130,59 +5179,77 @@ ; CI-LABEL: name: test_load_constant_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 @@ -5190,39 +5257,48 @@ ; CI-MESA-LABEL: name: test_load_constant_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 @@ -5269,9 +5345,14 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1, addrspace 4) @@ -5280,18 +5361,18 @@ ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -5321,9 +5402,14 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1, addrspace 4) @@ -5331,17 +5417,17 @@ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -5371,7 +5457,9 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1, addrspace 4) @@ -5387,7 +5475,7 @@ ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 @@ -5425,9 +5513,14 @@ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1, addrspace 4) @@ -5436,18 +5529,18 @@ ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; CI-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; CI-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -5477,7 +5570,9 @@ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1, addrspace 4) @@ -5493,7 +5588,7 @@ ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 @@ -5580,102 +5675,132 @@ ; CI-LABEL: name: test_load_constant_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v4s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v4s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 4) @@ -5717,9 +5842,14 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1, addrspace 4) @@ -5728,9 +5858,9 @@ ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1, addrspace 4) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) @@ -5740,11 +5870,15 @@ ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -5771,9 +5905,14 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1, addrspace 4) @@ -5781,8 +5920,8 @@ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1, addrspace 4) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) @@ -5791,10 +5930,14 @@ ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v4s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -5821,7 +5964,9 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1, addrspace 4) @@ -5843,8 +5988,10 @@ ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v4s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -5875,9 +6022,14 @@ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1, addrspace 4) @@ -5886,9 +6038,9 @@ ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) @@ -5898,11 +6050,15 @@ ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v4s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -5929,7 +6085,9 @@ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1, addrspace 4) @@ -5951,8 +6109,10 @@ ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 4) @@ -5968,192 +6128,246 @@ ; CI-LABEL: name: test_load_constant_v8s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, align 4, addrspace 4) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2, align 8, addrspace 4) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2, align 4, addrspace 4) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; VI-LABEL: name: test_load_constant_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, align 4, addrspace 4) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2, align 8, addrspace 4) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2, align 4, addrspace 4) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-LABEL: name: test_load_constant_v8s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2, align 8, addrspace 4) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v8s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2, align 8, addrspace 4) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v8s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2, align 8, addrspace 4) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -4279,10 +4279,12 @@ ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) @@ -4323,10 +4325,12 @@ ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) @@ -4378,8 +4382,8 @@ ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) @@ -4404,8 +4408,8 @@ ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) @@ -4611,10 +4615,12 @@ ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v4s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -4649,10 +4655,12 @@ ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 0) @@ -4717,10 +4725,12 @@ ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v4s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -4755,10 +4765,12 @@ ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 1, addrspace 0) @@ -4919,53 +4931,71 @@ ; CI-LABEL: name: test_load_flat_v2s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_flat_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_flat_v2s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v2s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v2s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 0) $vgpr0 = COPY %1 @@ -5006,8 +5036,13 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_flat_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -5033,8 +5068,13 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_flat_v2s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -5060,8 +5100,10 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v2s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -5091,8 +5133,13 @@ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v2s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -5118,8 +5165,10 @@ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 0) $vgpr0 = COPY %1 @@ -5220,59 +5269,77 @@ ; CI-LABEL: name: test_load_flat_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 @@ -5280,39 +5347,48 @@ ; CI-MESA-LABEL: name: test_load_flat_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 @@ -5359,9 +5435,14 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1) @@ -5370,18 +5451,18 @@ ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -5411,9 +5492,14 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1) @@ -5421,17 +5507,17 @@ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -5461,7 +5547,9 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1) @@ -5477,7 +5565,7 @@ ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 @@ -5515,9 +5603,14 @@ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1) @@ -5526,18 +5619,18 @@ ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; CI-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; CI-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -5567,7 +5660,9 @@ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1) @@ -5583,7 +5678,7 @@ ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 @@ -5670,102 +5765,132 @@ ; CI-LABEL: name: test_load_flat_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v4s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v4s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 0) @@ -5807,9 +5932,14 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1) @@ -5818,9 +5948,9 @@ ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) @@ -5830,11 +5960,15 @@ ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -5861,9 +5995,14 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1) @@ -5871,8 +6010,8 @@ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) @@ -5881,10 +6020,14 @@ ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v4s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -5911,7 +6054,9 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1) @@ -5933,8 +6078,10 @@ ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v4s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -5965,9 +6112,14 @@ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1) @@ -5976,9 +6128,9 @@ ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) @@ -5988,11 +6140,15 @@ ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v4s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -6019,7 +6175,9 @@ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1) @@ -6041,8 +6199,10 @@ ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 0) @@ -6058,192 +6218,246 @@ ; CI-LABEL: name: test_load_flat_v8s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2, align 4) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2, align 8) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 2, align 4) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 2) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; VI-LABEL: name: test_load_flat_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2, align 4) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2, align 8) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 2, align 4) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 2) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-LABEL: name: test_load_flat_v8s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2, align 4) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2, align 8) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 2, align 4) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 2) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v8s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2, align 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2, align 8) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 2, align 4) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 2) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v8s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2, align 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2, align 8) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 2, align 4) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -3933,10 +3933,12 @@ ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) @@ -3955,10 +3957,12 @@ ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) @@ -4051,10 +4055,12 @@ ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) @@ -4067,8 +4073,8 @@ ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[TRUNC]](<2 x s8>) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) @@ -4341,10 +4347,12 @@ ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 1) @@ -4436,10 +4444,12 @@ ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 1, addrspace 1) @@ -4596,13 +4606,19 @@ ; SI-LABEL: name: test_load_global_v2s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v2s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) @@ -4610,23 +4626,35 @@ ; CI-MESA-LABEL: name: test_load_global_v2s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_global_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) @@ -4634,13 +4662,13 @@ ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 1) $vgpr0 = COPY %1 @@ -4718,8 +4746,13 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v2s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) @@ -4753,8 +4786,13 @@ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_global_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) @@ -4780,8 +4818,13 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) @@ -4811,8 +4854,10 @@ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 1) $vgpr0 = COPY %1 @@ -4931,22 +4976,31 @@ ; SI-LABEL: name: test_load_global_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -4957,42 +5011,60 @@ ; CI-MESA-LABEL: name: test_load_global_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -5003,19 +5075,19 @@ ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 @@ -5062,9 +5134,14 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1, addrspace 1) @@ -5073,18 +5150,18 @@ ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -5124,9 +5201,14 @@ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1, addrspace 1) @@ -5135,18 +5217,18 @@ ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; CI-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; CI-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -5176,9 +5258,14 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1, addrspace 1) @@ -5186,17 +5273,17 @@ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -5232,7 +5319,9 @@ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1, addrspace 1) @@ -5248,7 +5337,7 @@ ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 @@ -5343,22 +5432,32 @@ ; SI-LABEL: name: test_load_global_v4s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v4s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -5367,42 +5466,62 @@ ; CI-MESA-LABEL: name: test_load_global_v4s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_global_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -5411,22 +5530,22 @@ ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 1) @@ -5468,9 +5587,14 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1, addrspace 1) @@ -5479,9 +5603,9 @@ ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) @@ -5491,11 +5615,15 @@ ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v4s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -5530,9 +5658,14 @@ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1, addrspace 1) @@ -5541,9 +5674,9 @@ ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) @@ -5553,11 +5686,15 @@ ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_global_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -5584,9 +5721,14 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1, addrspace 1) @@ -5594,8 +5736,8 @@ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) @@ -5604,10 +5746,14 @@ ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -5638,7 +5784,9 @@ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1, addrspace 1) @@ -5660,8 +5808,10 @@ ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 1) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -5438,10 +5438,12 @@ ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) @@ -5515,9 +5517,7 @@ ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 3) @@ -5981,14 +5981,18 @@ ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3) @@ -6008,16 +6012,20 @@ ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<8 x s32>) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<8 x s16>) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS2]](<16 x s8>) %0:_(p3) = COPY $vgpr0 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -6063,53 +6071,77 @@ ; SI-LABEL: name: test_load_local_v2s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-LABEL: name: test_load_local_v2s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-DS128-LABEL: name: test_load_local_v2s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-DS128: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-DS128: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_local_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_local_v2s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 3) $vgpr0 = COPY %1 @@ -6150,8 +6182,13 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-LABEL: name: test_load_local_v2s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -6181,8 +6218,13 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-DS128-LABEL: name: test_load_local_v2s16_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -6212,8 +6254,13 @@ ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-DS128: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-DS128: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_local_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -6239,8 +6286,13 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_local_v2s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -6266,8 +6318,10 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 3) $vgpr0 = COPY %1 @@ -6325,99 +6379,135 @@ ; SI-LABEL: name: test_load_local_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI-DS128: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 @@ -6464,9 +6554,14 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1, addrspace 3) @@ -6475,18 +6570,18 @@ ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -6520,9 +6615,14 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1, addrspace 3) @@ -6531,18 +6631,18 @@ ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -6576,9 +6676,14 @@ ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1, addrspace 3) @@ -6587,18 +6692,18 @@ ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; CI-DS128: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; CI-DS128: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CI-DS128: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; CI-DS128: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; CI-DS128: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; CI-DS128: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -6628,9 +6733,14 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1, addrspace 3) @@ -6638,17 +6748,17 @@ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 + ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 @@ -6678,7 +6788,9 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1, addrspace 3) @@ -6694,7 +6806,7 @@ ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR]](<2 x s16>), 0 + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 @@ -6779,102 +6891,142 @@ ; SI-LABEL: name: test_load_local_v4s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_local_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v4s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-DS128: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_local_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 3) @@ -6916,9 +7068,14 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1, addrspace 3) @@ -6927,9 +7084,9 @@ ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) @@ -6939,11 +7096,15 @@ ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_local_v4s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6974,9 +7135,14 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1, addrspace 3) @@ -6985,9 +7151,9 @@ ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) @@ -6997,11 +7163,15 @@ ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v4s16_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -7032,9 +7202,14 @@ ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1, addrspace 3) @@ -7043,9 +7218,9 @@ ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32) ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) @@ -7055,11 +7230,15 @@ ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-DS128: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI-DS128: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_local_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -7086,9 +7265,14 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1, addrspace 3) @@ -7096,8 +7280,8 @@ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) @@ -7106,10 +7290,14 @@ ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v4s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -7136,7 +7324,9 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1, addrspace 3) @@ -7158,8 +7348,10 @@ ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 3) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -4423,10 +4423,12 @@ ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1, addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) @@ -4488,9 +4490,7 @@ ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1, addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 5) @@ -4873,10 +4873,12 @@ ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1, addrspace 56) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1, addrspace 56) @@ -4888,10 +4890,12 @@ ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1, addrspace 56) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1, addrspace 56) @@ -4903,10 +4907,12 @@ ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1, addrspace 56) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1, addrspace 56) @@ -4918,12 +4924,14 @@ ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1, addrspace 56) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) + ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) %0:_(p5) = COPY $vgpr0 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 56) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -4965,43 +4973,61 @@ ; SI-LABEL: name: test_load_private_v2s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-LABEL: name: test_load_private_v2s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_private_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_private_v2s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 5) $vgpr0 = COPY %1 @@ -5042,8 +5068,13 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-LABEL: name: test_load_private_v2s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -5073,8 +5104,13 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_private_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -5100,8 +5136,13 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_private_v2s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -5127,8 +5168,10 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 5) $vgpr0 = COPY %1 @@ -5143,79 +5186,106 @@ ; SI-LABEL: name: test_load_private_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, align 4, addrspace 5) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, align 4, addrspace 5) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, align 4, addrspace 5) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, align 4, addrspace 5) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 @@ -5236,79 +5306,106 @@ ; SI-LABEL: name: test_load_private_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 @@ -5368,13 +5465,21 @@ ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5418,13 +5523,21 @@ ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5462,13 +5575,21 @@ ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5506,10 +5627,13 @@ ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 @@ -5614,78 +5738,108 @@ ; SI-LABEL: name: test_load_private_v4s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_private_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_private_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 5) @@ -5727,9 +5881,14 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1, addrspace 5) @@ -5738,9 +5897,9 @@ ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) @@ -5750,11 +5909,15 @@ ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_private_v4s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5785,9 +5948,14 @@ ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1, addrspace 5) @@ -5796,9 +5964,9 @@ ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) @@ -5808,11 +5976,15 @@ ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_private_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5839,9 +6011,14 @@ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1, addrspace 5) @@ -5849,8 +6026,8 @@ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) @@ -5859,10 +6036,14 @@ ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C4]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v4s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5889,7 +6070,9 @@ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1, addrspace 5) @@ -5911,8 +6094,10 @@ ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 5) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -491,15 +491,19 @@ ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_lshr_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -514,8 +518,12 @@ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC2]](s16) ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR2]](s16), [[LSHR3]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_lshr_v2s16_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -544,13 +552,17 @@ ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[UV]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[UV1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_lshr_v2s16_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -562,8 +574,12 @@ ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[UV]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[UV1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[LSHR2]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_lshr_v2s16_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -575,8 +591,10 @@ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[UV]](s32) ; GFX9: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[UV1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[LSHR2]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 %2:_(<2 x s16>) = G_LSHR %0, %1 @@ -615,26 +633,33 @@ ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_lshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -665,13 +690,20 @@ ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR4]](s16), [[LSHR5]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR6]](s16), [[DEF2]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR6]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_lshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -741,28 +773,36 @@ ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[AND6]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_lshr_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -790,9 +830,17 @@ ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC5]](s16) ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC6]](s16) ; VI: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR4]](s16), [[LSHR5]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR6]](s16), [[LSHR7]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR6]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR7]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_lshr_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -265,13 +265,18 @@ ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY3]] - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[MUL]](s32) ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY4]], [[COPY5]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[MUL1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[MUL]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[MUL1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_mul_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -286,8 +291,12 @@ ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC2]] ; GFX8: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[TRUNC1]], [[TRUNC3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[MUL]](s16), [[MUL1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[MUL]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[MUL1]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_mul_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -302,8 +311,10 @@ ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[TRUNC]], [[TRUNC2]] ; GFX9: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[TRUNC1]], [[TRUNC3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[MUL]](s16), [[MUL1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MUL]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[MUL1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_MUL %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -72,15 +72,20 @@ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<2 x s16>) = G_PHI [[COPY]](<2 x s16>), %bb.0, [[BUILD_VECTOR]](<2 x s16>), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(<2 x s16>) = G_PHI [[COPY]](<2 x s16>), %bb.0, [[BITCAST2]](<2 x s16>), %bb.1 ; CHECK: $vgpr0 = COPY [[PHI]](<2 x s16>) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -144,28 +149,36 @@ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[COPY7]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) - ; CHECK: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF3]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CHECK: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1 ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[PHI]](<4 x s16>), 0 - ; CHECK: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT2]](<3 x s16>), 0 + ; CHECK: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT2]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -226,22 +239,31 @@ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[COPY7]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[COPY9]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ADD3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[ADD3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[COPY]](<4 x s16>), %bb.0, [[CONCAT_VECTORS]](<4 x s16>), %bb.1 @@ -943,10 +965,10 @@ ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p3) = G_PHI [[COPY]](p3), %bb.0, [[GEP]](p3), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(p3) = G_PHI [[COPY]](p3), %bb.0, [[PTR_ADD]](p3), %bb.1 ; CHECK: $vgpr0 = COPY [[PHI]](p3) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -991,10 +1013,10 @@ ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p5) = G_PHI [[COPY]](p5), %bb.0, [[GEP]](p5), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(p5) = G_PHI [[COPY]](p5), %bb.0, [[PTR_ADD]](p5), %bb.1 ; CHECK: $vgpr0 = COPY [[PHI]](p5) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -1039,10 +1061,10 @@ ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p0) = G_PHI [[COPY]](p0), %bb.0, [[GEP]](p0), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(p0) = G_PHI [[COPY]](p0), %bb.0, [[PTR_ADD]](p0), %bb.1 ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p0) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -1087,10 +1109,10 @@ ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p1) = G_PHI [[COPY]](p1), %bb.0, [[GEP]](p1), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(p1) = G_PHI [[COPY]](p1), %bb.0, [[PTR_ADD]](p1), %bb.1 ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p1) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -1135,10 +1157,10 @@ ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p4) = G_PHI [[COPY]](p4), %bb.0, [[GEP]](p4), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(p4) = G_PHI [[COPY]](p4), %bb.0, [[PTR_ADD]](p4), %bb.1 ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p4) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -210,7 +210,6 @@ ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[ASHR]], [[ASHR1]] - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32) ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -218,9 +217,15 @@ ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; GFX6: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[ASHR2]], [[ASHR3]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SDIV1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_sdiv_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -236,7 +241,6 @@ ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[ASHR]], [[ASHR1]] - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32) ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -244,9 +248,15 @@ ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; GFX8: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[ASHR2]], [[ASHR3]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SDIV1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_sdiv_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -262,7 +272,6 @@ ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[ASHR]], [[ASHR1]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -270,9 +279,10 @@ ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; GFX9: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[ASHR2]], [[ASHR3]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SDIV1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SDIV %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -811,10 +811,11 @@ ; GFX9-LABEL: name: test_sext_inreg_v2s16_1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x s16>) - ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[C]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9: $vgpr0 = COPY [[ASHR]](<2 x s16>) ; GFX8-LABEL: name: test_sext_inreg_v2s16_1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -828,8 +829,12 @@ ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) + ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX6-LABEL: name: test_sext_inreg_v2s16_1 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) @@ -846,15 +851,20 @@ ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[COPY4]](s32) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[COPY6]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_SEXT_INREG %0, 1 $vgpr0 = COPY %1 @@ -870,10 +880,13 @@ ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[DEF]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[DEF]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 @@ -884,9 +897,9 @@ ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s16) = COPY [[C]](s16) + ; GFX9: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C]](s16) ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT3]], [[COPY1]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT3]], [[COPY3]](s16) ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF4]](<4 x s16>), 0 ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -906,9 +919,9 @@ ; GFX9: [[DEF9:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT9:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF9]], [[EXTRACT1]](<3 x s16>), 0 ; GFX9: [[EXTRACT10:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT9]](<4 x s16>), 0 - ; GFX9: [[COPY2:%[0-9]+]]:_(s16) = COPY [[C]](s16) + ; GFX9: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C]](s16) ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT8]], [[EXTRACT10]](<2 x s16>) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT9]], [[COPY2]](s16) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT9]], [[COPY4]](s16) ; GFX9: [[DEF10:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT11:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF10]](<4 x s16>), 0 ; GFX9: [[DEF11:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -942,10 +955,17 @@ ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) ; GFX8: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C]](s16) - ; GFX8: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR2]](s16), [[DEF1]](s16) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX8: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX8: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT1]](<3 x s16>), 0 ; GFX8: $vgpr0_vgpr1 = COPY [[INSERT1]](<4 x s16>) @@ -974,23 +994,31 @@ ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[COPY6]](s32) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C]](s32) ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[COPY8]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) ; GFX6: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; GFX6: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[ASHR4]], [[COPY10]](s32) - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR5]](s32) - ; GFX6: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX6: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; GFX6: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT1]](<3 x s16>), 0 ; GFX6: $vgpr0_vgpr1 = COPY [[INSERT1]](<4 x s16>) @@ -1109,16 +1137,23 @@ ; GFX9-LABEL: name: test_sext_inreg_v4s16_1 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: test_sext_inreg_v4s16_1 @@ -1142,9 +1177,17 @@ ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) ; GFX8: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C]](s16) ; GFX8: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C]](s16) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR2]](s16), [[ASHR3]](s16) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR3]](s16) + ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) + ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX6-LABEL: name: test_sext_inreg_v4s16_1 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -1171,28 +1214,37 @@ ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C]](s32) ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[COPY8]](s32) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[COPY10]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C]](s32) ; GFX6: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL6]], [[C]](s32) ; GFX6: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[ASHR4]], [[COPY12]](s32) - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR5]](s32) ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C]](s32) ; GFX6: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[C]](s32) ; GFX6: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[ASHR6]], [[COPY14]](s32) - ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR7]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY16:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C2]] + ; GFX6: [[COPY17:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C2]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY18:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C2]] + ; GFX6: [[COPY19:%[0-9]+]]:_(s32) = COPY [[ASHR7]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C2]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_SEXT_INREG %0, 1 @@ -1207,20 +1259,31 @@ ; GFX9-LABEL: name: test_sext_inreg_v6s16_1 ; GFX9: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF ; GFX9: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[TRUNC]](<6 x s16>) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV2]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR4]](<2 x s16>) - ; GFX9: [[ASHR2:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL2]], [[BUILD_VECTOR5]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV2]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9: [[ASHR2:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL2]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>), [[ASHR2]](<2 x s16>) ; GFX9: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: test_sext_inreg_v6s16_1 @@ -1253,10 +1316,22 @@ ; GFX8: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C]](s16) ; GFX8: [[ASHR4:%[0-9]+]]:_(s16) = G_ASHR [[SHL4]], [[C]](s16) ; GFX8: [[ASHR5:%[0-9]+]]:_(s16) = G_ASHR [[SHL5]], [[C]](s16) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) - ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR2]](s16), [[ASHR3]](s16) - ; GFX8: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR4]](s16), [[ASHR5]](s16) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) + ; GFX8: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX8: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR3]](s16) + ; GFX8: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) + ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR4]](s16) + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR5]](s16) + ; GFX8: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C1]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; GFX6-LABEL: name: test_sext_inreg_v6s16_1 ; GFX6: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF @@ -1292,41 +1367,54 @@ ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C]](s32) ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL6]], [[C]](s32) ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[COPY11]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C]](s32) ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[C]](s32) ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[COPY13]](s32) - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) ; GFX6: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY16:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32) ; GFX6: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL8]], [[C]](s32) ; GFX6: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[ASHR4]], [[COPY15]](s32) - ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR5]](s32) ; GFX6: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY18:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C]](s32) ; GFX6: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL9]], [[C]](s32) ; GFX6: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[ASHR6]], [[COPY17]](s32) - ; GFX6: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR7]](s32) ; GFX6: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY20:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32) ; GFX6: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C]](s32) ; GFX6: [[ASHR8:%[0-9]+]]:_(s32) = G_ASHR [[SHL10]], [[C]](s32) ; GFX6: [[ASHR9:%[0-9]+]]:_(s32) = G_ASHR [[ASHR8]], [[COPY19]](s32) - ; GFX6: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR9]](s32) ; GFX6: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX6: [[COPY22:%[0-9]+]]:_(s32) = COPY [[SHL5]](s32) ; GFX6: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY22]], [[C]](s32) ; GFX6: [[ASHR10:%[0-9]+]]:_(s32) = G_ASHR [[SHL11]], [[C]](s32) ; GFX6: [[ASHR11:%[0-9]+]]:_(s32) = G_ASHR [[ASHR10]], [[COPY21]](s32) - ; GFX6: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR11]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX6: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY23:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C2]] + ; GFX6: [[COPY24:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C2]] + ; GFX6: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL12]] + ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY25:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C2]] + ; GFX6: [[COPY26:%[0-9]+]]:_(s32) = COPY [[ASHR7]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C2]] + ; GFX6: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL13]] + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[COPY27:%[0-9]+]]:_(s32) = COPY [[ASHR9]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C2]] + ; GFX6: [[COPY28:%[0-9]+]]:_(s32) = COPY [[ASHR11]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C2]] + ; GFX6: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL14]] + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<6 x s16>) = G_SEXT_INREG %0, 1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -481,14 +481,18 @@ ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[AND]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[AND1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_shl_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -503,8 +507,12 @@ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_shl_v2s16_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -531,12 +539,17 @@ ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[UV]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[UV1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL2]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_shl_v2s16_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -548,8 +561,12 @@ ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[UV]](s32) ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[UV1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_shl_v2s16_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -561,8 +578,10 @@ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[UV]](s32) ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[UV1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 %2:_(<2 x s16>) = G_SHL %0, %1 @@ -600,24 +619,31 @@ ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[AND]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[AND1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[AND2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL4]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_shl_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -648,13 +674,20 @@ ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL2]](s16), [[DEF2]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_shl_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -723,25 +756,33 @@ ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[AND]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[AND1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[AND2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[AND3]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_shl_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -769,9 +810,17 @@ ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL2]](s16), [[SHL3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SHL3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_shl_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -14,9 +14,12 @@ ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -43,10 +46,15 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -72,11 +80,16 @@ ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -103,13 +116,18 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -138,11 +156,16 @@ ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -169,11 +192,17 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -202,12 +231,17 @@ ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -234,11 +268,17 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -265,10 +305,15 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -294,11 +339,16 @@ ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -325,13 +375,18 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -360,11 +415,16 @@ ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -390,13 +450,17 @@ ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -425,10 +489,14 @@ ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_undef ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -457,12 +525,17 @@ ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -489,21 +562,27 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 2) @@ -524,11 +603,17 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -557,11 +642,16 @@ ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -569,11 +659,11 @@ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 0) @@ -594,25 +684,30 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 3) @@ -635,11 +730,16 @@ ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -647,11 +747,11 @@ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 2) @@ -672,25 +772,30 @@ ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 1) @@ -731,7 +836,6 @@ ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) ; GFX8: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[EXTRACT2]](s32) ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX8: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) @@ -750,14 +854,24 @@ ; GFX8: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; GFX8: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR3]](s32), [[ASHR4]](s32), [[ASHR5]](s32) ; GFX8: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[EXTRACT3]](s32) ; GFX8: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0 - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[EXTRACT4]](s32) ; GFX8: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 0 - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[EXTRACT5]](s32) - ; GFX8: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY8:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; GFX8: [[COPY9:%[0-9]+]]:_(s32) = COPY [[EXTRACT3]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; GFX8: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT4]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; GFX8: [[COPY11:%[0-9]+]]:_(s32) = COPY [[EXTRACT5]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; GFX8: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: shufflevector_v4s16_v3s16_2_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -784,7 +898,6 @@ ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32) ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[EXTRACT2]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) @@ -803,14 +916,15 @@ ; GFX9: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR3]](s32), [[ASHR4]](s32), [[ASHR5]](s32) ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[EXTRACT3]](s32) ; GFX9: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0 - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[EXTRACT4]](s32) ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 0 - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[EXTRACT5]](s32) - ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[EXTRACT3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT4]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[EXTRACT5]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -835,16 +949,23 @@ ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 16 ; GFX8: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[EXTRACT]](s16), [[EXTRACT1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT1]](s16) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v4s16_1_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 16 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[EXTRACT]](s16), [[EXTRACT1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) @@ -866,16 +987,23 @@ ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 16 ; GFX8: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 48 - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[EXTRACT]](s16), [[EXTRACT1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT1]](s16) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v4s16_1_3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 16 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 48 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[EXTRACT]](s16), [[EXTRACT1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 3) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -284,7 +284,6 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -292,9 +291,15 @@ ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[ASHR2]], [[ASHR3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SMAX1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_smax_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -309,8 +314,12 @@ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC2]] ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_smax_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -355,7 +364,6 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -363,7 +371,6 @@ ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[ASHR2]], [[ASHR3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) @@ -371,11 +378,21 @@ ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[ASHR4]], [[ASHR5]] - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX2]](s32) - ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SMAX1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SMAX2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_smax_v3s16 @@ -407,10 +424,17 @@ ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]] ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX2]](s16), [[DEF4]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_smax_v3s16 @@ -462,7 +486,6 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -470,7 +493,6 @@ ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[ASHR2]], [[ASHR3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX1]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) @@ -478,7 +500,6 @@ ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[ASHR4]], [[ASHR5]] - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX2]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) ; SI: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL6]], [[C]](s32) @@ -486,10 +507,22 @@ ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C]](s32) ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[C]](s32) ; SI: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[ASHR6]], [[ASHR7]] - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SMAX1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SMAX2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SMAX3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_smax_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -517,9 +550,17 @@ ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC5]] ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC6]] ; VI: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX2]](s16), [[SMAX3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_smax_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -284,7 +284,6 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -292,9 +291,15 @@ ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[ASHR2]], [[ASHR3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SMIN1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_smin_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -309,8 +314,12 @@ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC2]] ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_smin_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -355,7 +364,6 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -363,7 +371,6 @@ ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[ASHR2]], [[ASHR3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) @@ -371,11 +378,21 @@ ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[ASHR4]], [[ASHR5]] - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN2]](s32) - ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SMIN1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SMIN2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_smin_v3s16 @@ -407,10 +424,17 @@ ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]] ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN2]](s16), [[DEF4]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_smin_v3s16 @@ -462,7 +486,6 @@ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -470,7 +493,6 @@ ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[ASHR2]], [[ASHR3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN1]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) @@ -478,7 +500,6 @@ ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[ASHR4]], [[ASHR5]] - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN2]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) ; SI: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL6]], [[C]](s32) @@ -486,10 +507,22 @@ ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[C]](s32) ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[C]](s32) ; SI: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[ASHR6]], [[ASHR7]] - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SMIN1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SMIN2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SMIN3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_smin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -517,9 +550,17 @@ ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC5]] ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC6]] ; VI: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN2]](s16), [[SMIN3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_smin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -216,7 +216,6 @@ ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[ASHR]], [[ASHR1]] - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32) ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -224,9 +223,15 @@ ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; GFX6: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[ASHR2]], [[ASHR3]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SREM1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_srem_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -242,7 +247,6 @@ ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[ASHR]], [[ASHR1]] - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32) ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -250,9 +254,15 @@ ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; GFX8: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[ASHR2]], [[ASHR3]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SREM1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_srem_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -268,7 +278,6 @@ ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[ASHR]], [[ASHR1]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -276,9 +285,10 @@ ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; GFX9: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[ASHR2]], [[ASHR3]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SREM1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SREM %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir @@ -127,13 +127,18 @@ ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY3]] - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32) ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY4]], [[COPY5]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_sub_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -148,8 +153,12 @@ ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC2]] ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SUB]](s16), [[SUB1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_sub_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -164,8 +173,10 @@ ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC2]] ; GFX9: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SUB]](s16), [[SUB1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SUB %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -206,15 +206,19 @@ ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX6: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UDIV]](s32) ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX6: [[UDIV1:%[0-9]+]]:_(s32) = G_UDIV [[AND2]], [[AND3]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UDIV1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UDIV1]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_udiv_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -229,15 +233,19 @@ ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX8: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UDIV]](s32) ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX8: [[UDIV1:%[0-9]+]]:_(s32) = G_UDIV [[AND2]], [[AND3]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UDIV1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UDIV1]](s32) + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_udiv_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -252,15 +260,15 @@ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UDIV]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[UDIV1:%[0-9]+]]:_(s32) = G_UDIV [[AND2]], [[AND3]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UDIV1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UDIV1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UDIV %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -269,15 +269,19 @@ ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UMAX1]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_umax_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -292,8 +296,12 @@ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC2]] ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_umax_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -337,23 +345,30 @@ ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX2]](s32) - ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UMAX1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UMAX2]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_umax_v3s16 @@ -385,10 +400,17 @@ ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]] ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX2]](s16), [[DEF4]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_umax_v3s16 @@ -439,28 +461,36 @@ ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX1]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX2]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[AND6]], [[AND7]] - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UMAX1]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UMAX2]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UMAX3]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_umax_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -488,9 +518,17 @@ ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC5]] ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC6]] ; VI: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX2]](s16), [[UMAX3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_umax_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -269,15 +269,19 @@ ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UMIN1]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_umin_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -292,8 +296,12 @@ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC2]] ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_umin_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -337,23 +345,30 @@ ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN2]](s32) - ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UMIN1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UMIN2]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_umin_v3s16 @@ -385,10 +400,17 @@ ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]] ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN2]](s16), [[DEF4]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_umin_v3s16 @@ -439,28 +461,36 @@ ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN1]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN2]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[AND6]], [[AND7]] - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UMIN1]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UMIN2]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UMIN3]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_umin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -488,9 +518,17 @@ ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC5]] ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC6]] ; VI: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN2]](s16), [[UMIN3]](s16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN3]](s16) + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_umin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -206,15 +206,19 @@ ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX6: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UREM]](s32) ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX6: [[UREM1:%[0-9]+]]:_(s32) = G_UREM [[AND2]], [[AND3]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UREM1]](s32) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UREM1]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_urem_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -229,15 +233,19 @@ ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX8: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UREM]](s32) ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX8: [[UREM1:%[0-9]+]]:_(s32) = G_UREM [[AND2]], [[AND3]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UREM1]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UREM1]](s32) + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_urem_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -252,15 +260,15 @@ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UREM]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[UREM1:%[0-9]+]]:_(s32) = G_UREM [[AND2]], [[AND3]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UREM1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UREM1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UREM %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.v2s16.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.v2s16.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: build_vector_trunc_v2s16_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 +... + +--- +name: build_vector_trunc_v2s16_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 + ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 +... + +--- +name: build_vector_trunc_v2s16_s32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 65535 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 +... + +--- +name: build_vector_trunc_v2s16_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 65535 + ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.v2s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.v2s16.mir +++ /dev/null @@ -1,99 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s - ---- -name: build_vector_v2s16_s32_ss -legalized: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - ; CHECK-LABEL: name: build_vector_v2s16_s32_ss - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 -... - ---- -name: build_vector_v2s16_s32_sv -legalized: true - -body: | - bb.0: - liveins: $sgpr0, $vgpr0 - - ; CHECK-LABEL: name: build_vector_v2s16_s32_sv - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 -... - ---- -name: build_vector_v2s16_s32_vs -legalized: true - -body: | - bb.0: - liveins: $vgpr0, $sgpr0 - ; CHECK-LABEL: name: build_vector_v2s16_s32_vs - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 -... - ---- -name: build_vector_v2s16_s32_vv -legalized: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - ; CHECK-LABEL: name: build_vector_v2s16_s32_vv - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %0 - %3:_(s16) = G_TRUNC %1 - %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 -...