Index: include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -193,31 +193,27 @@ static unsigned canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, LLT OpTy, LLT DestTy) { - if (OpTy.isVector() && DestTy.isVector()) - return MergeOp == TargetOpcode::G_CONCAT_VECTORS; - - if (OpTy.isVector() && !DestTy.isVector()) { - if (MergeOp == TargetOpcode::G_BUILD_VECTOR) + // Check if we found a definition that is like G_MERGE_VALUES. + switch (MergeOp) { + default: + return false; + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_MERGE_VALUES: + return true; + case TargetOpcode::G_CONCAT_VECTORS: { + if (ConvertOp == 0) return true; - if (MergeOp == TargetOpcode::G_CONCAT_VECTORS) { - if (ConvertOp == 0) - return true; - - const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); - - // Don't handle scalarization with a cast that isn't in the same - // direction as the vector cast. This could be handled, but it would - // require more intermediate unmerges. - if (ConvertOp == TargetOpcode::G_TRUNC) - return DestTy.getSizeInBits() <= OpEltSize; - return DestTy.getSizeInBits() >= OpEltSize; - } + const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); - return false; + // Don't handle scalarization with a cast that isn't in the same + // direction as the vector cast. This could be handled, but it would + // require more intermediate unmerges. + if (ConvertOp == TargetOpcode::G_TRUNC) + return DestTy.getSizeInBits() <= OpEltSize; + return DestTy.getSizeInBits() >= OpEltSize; + } } - - return MergeOp == TargetOpcode::G_MERGE_VALUES; } bool tryCombineMerges(MachineInstr &MI, @@ -309,6 +305,10 @@ } else { LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); + + if (!ConvertOp && DestTy != MergeSrcTy) + ConvertOp = TargetOpcode::G_BITCAST; + if (ConvertOp) { Builder.setInstr(MI); @@ -321,10 +321,10 @@ markInstAndDefDead(MI, *MergeI, DeadInsts); return true; } - // FIXME: is a COPY appropriate if the types mismatch? We know both - // registers are allocatable by now. - if (DestTy != MergeSrcTy) - return false; + + assert(DestTy == MergeSrcTy && + "Bitcast and the other kinds of conversions should " + "have happened earlier"); for (unsigned Idx = 0; Idx < NumDefs; ++Idx) MRI.replaceRegWith(MI.getOperand(Idx).getReg(), Index: test/CodeGen/AArch64/GlobalISel/integration-shuffle-vector.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/GlobalISel/integration-shuffle-vector.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple aarch64-apple-ios -stop-after=instruction-select %s -o - | FileCheck %s + +; Check that packing incoming arguments into a big vector type +; and unpacking them in registers for the call to @bar gets selected as just +; simple copies. I.e., we don't artificial try to keep the big +; vector (%vec) alive. +define void @shuffle_to_concat_vector(<2 x i64> %a, <2 x i64> %b) { + ; CHECK-LABEL: name: shuffle_to_concat_vector + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $q0 = COPY [[COPY]] + ; CHECK: $q1 = COPY [[COPY1]] + ; CHECK: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $q0, implicit $q1 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + %vec = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> + call void @bar(<4 x i64> %vec) + ret void +} + +declare void @bar(<4 x i64> %vec) Index: test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir +++ test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir @@ -1,15 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s - ---- | - target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" - target triple = "aarch64--" - define void @test_unmerge() { - entry: - ret void - } - define void @test_legal_const_ext() { ret void } -... +# RUN: llc -O0 -mtriple aarch64-- -run-pass=legalizer %s -o - | FileCheck %s --- name: test_unmerge @@ -44,3 +34,60 @@ %4:_(s32) = G_ANYEXT %3(s1) $w0 = COPY %4(s32) ... + +# Check that the artifact combiner can get rid of the big +# vector type (4 x s64) by combining the G_UNMERGE_VALUES +# with the G_CONCAT_VECTORS and turning that into bitcast. +--- +name: concat_vectors_unmerge_to_bitcast +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: concat_vectors_unmerge_to_bitcast + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[COPY1]](<2 x s64>) + ; CHECK: $q0 = COPY [[BITCAST]](s128) + ; CHECK: $q1 = COPY [[BITCAST1]](s128) + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %2:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>) + %3:_(s128), %4:_(s128) = G_UNMERGE_VALUES %2(<4 x s64>) + $q0 = COPY %3(s128) + $q1 = COPY %4(s128) +... + +# Check that the artifact combiner can get rid of the big +# vector type (4 x s64) by combining the G_UNMERGE_VALUES +# with the G_CONCAT_VECTORS and turning that into smaller +# 2x64-bit G_UNMERGE_VALUES. +--- +name: concat_vectors_unmerge_to_unmerge +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: concat_vectors_unmerge_to_unmerge + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; CHECK: $x0 = COPY [[UV]](s64) + ; CHECK: $x1 = COPY [[UV1]](s64) + ; CHECK: $x2 = COPY [[UV2]](s64) + ; CHECK: $x3 = COPY [[UV3]](s64) + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %2:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>) + %3:_(s64), %4:_(s64), %5:_(s64), %6:_(s64) = G_UNMERGE_VALUES %2(<4 x s64>) + $x0 = COPY %3(s64) + $x1 = COPY %4(s64) + $x2 = COPY %5(s64) + $x3 = COPY %6(s64) +...