This needs more tests, but something along these lines is needed to catch the last motivational case from https://bugs.llvm.org/show_bug.cgi?id=50971
.globl _Z6float1Dv4_dS_ # -- Begin function _Z6float1Dv4_dS_
.p2align 4, 0x90
.type _Z6float1Dv4_dS_,@function
_Z6float1Dv4_dS_: # @_Z6float1Dv4_dS_
.cfi_startproc
# %bb.0:
vbroadcastsd %xmm1, %ymm1
vblendps $192, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
retq
.Lfunc_end0:
.size _Z6float1Dv4_dS_, .Lfunc_end0-_Z6float1Dv4_dS_
.cfi_endproc
# -- End function
.ident "clang version 14.0.0 (https://github.com/llvm/llvm-project.git 41e3ac398c3ae9dfba5a57d80c420c122c1ec700)"
.section ".note.GNU-stack","",@progbits
lowerShuffleAsDecomposedShuffleMerge does some of this already - can we not extend that?