Changeset View
Standalone View
test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
Show First 20 Lines • Show All 92 Lines • ▼ Show 20 Lines | |||||
define <4 x i64> @test_mm256_adds_epi8(<4 x i64> %a0, <4 x i64> %a1) { | define <4 x i64> @test_mm256_adds_epi8(<4 x i64> %a0, <4 x i64> %a1) { | ||||
; CHECK-LABEL: test_mm256_adds_epi8: | ; CHECK-LABEL: test_mm256_adds_epi8: | ||||
; CHECK: # %bb.0: | ; CHECK: # %bb.0: | ||||
; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 | ; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 | ||||
; CHECK-NEXT: ret{{[l|q]}} | ; CHECK-NEXT: ret{{[l|q]}} | ||||
%arg0 = bitcast <4 x i64> %a0 to <32 x i8> | %arg0 = bitcast <4 x i64> %a0 to <32 x i8> | ||||
%arg1 = bitcast <4 x i64> %a1 to <32 x i8> | %arg1 = bitcast <4 x i64> %a1 to <32 x i8> | ||||
%res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %arg0, <32 x i8> %arg1) | %1 = sext <32 x i8> %arg0 to <32 x i16> | ||||
%bc = bitcast <32 x i8> %res to <4 x i64> | %2 = sext <32 x i8> %arg1 to <32 x i16> | ||||
%3 = add nsw <32 x i16> %1, %2 | |||||
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> | |||||
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> | |||||
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> | |||||
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> | |||||
%8 = trunc <32 x i16> %7 to <32 x i8> | |||||
%bc = bitcast <32 x i8> %8 to <4 x i64> | |||||
ret <4 x i64> %bc | ret <4 x i64> %bc | ||||
} | } | ||||
declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone | |||||
define <4 x i64> @test_mm256_adds_epi16(<4 x i64> %a0, <4 x i64> %a1) { | define <4 x i64> @test_mm256_adds_epi16(<4 x i64> %a0, <4 x i64> %a1) { | ||||
; CHECK-LABEL: test_mm256_adds_epi16: | ; CHECK-LABEL: test_mm256_adds_epi16: | ||||
; CHECK: # %bb.0: | ; CHECK: # %bb.0: | ||||
; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 | ; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 | ||||
; CHECK-NEXT: ret{{[l|q]}} | ; CHECK-NEXT: ret{{[l|q]}} | ||||
%arg0 = bitcast <4 x i64> %a0 to <16 x i16> | %arg0 = bitcast <4 x i64> %a0 to <16 x i16> | ||||
%arg1 = bitcast <4 x i64> %a1 to <16 x i16> | %arg1 = bitcast <4 x i64> %a1 to <16 x i16> | ||||
%res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %arg0, <16 x i16> %arg1) | %1 = sext <16 x i16> %arg0 to <16 x i32> | ||||
%bc = bitcast <16 x i16> %res to <4 x i64> | %2 = sext <16 x i16> %arg1 to <16 x i32> | ||||
%3 = add nsw <16 x i32> %1, %2 | |||||
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> | |||||
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> | |||||
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> | |||||
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> | |||||
%8 = trunc <16 x i32> %7 to <16 x i16> | |||||
%bc = bitcast <16 x i16> %8 to <4 x i64> | |||||
ret <4 x i64> %bc | ret <4 x i64> %bc | ||||
} | } | ||||
declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone | |||||
define <4 x i64> @test_mm256_adds_epu8(<4 x i64> %a0, <4 x i64> %a1) { | define <4 x i64> @test_mm256_adds_epu8(<4 x i64> %a0, <4 x i64> %a1) { | ||||
; CHECK-LABEL: test_mm256_adds_epu8: | ; CHECK-LABEL: test_mm256_adds_epu8: | ||||
; CHECK: # %bb.0: | ; CHECK: # %bb.0: | ||||
; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 | ; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 | ||||
; CHECK-NEXT: ret{{[l|q]}} | ; CHECK-NEXT: ret{{[l|q]}} | ||||
%arg0 = bitcast <4 x i64> %a0 to <32 x i8> | %arg0 = bitcast <4 x i64> %a0 to <32 x i8> | ||||
%arg1 = bitcast <4 x i64> %a1 to <32 x i8> | %arg1 = bitcast <4 x i64> %a1 to <32 x i8> | ||||
%res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %arg0, <32 x i8> %arg1) | %1 = add <32 x i8> %arg0, %arg1 | ||||
%bc = bitcast <32 x i8> %res to <4 x i64> | %2 = icmp ugt <32 x i8> %arg0, %1 | ||||
%3 = select <32 x i1> %2, <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <32 x i8> %1 | |||||
%bc = bitcast <32 x i8> %3 to <4 x i64> | |||||
ret <4 x i64> %bc | ret <4 x i64> %bc | ||||
} | } | ||||
declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone | |||||
define <4 x i64> @test_mm256_adds_epu16(<4 x i64> %a0, <4 x i64> %a1) { | define <4 x i64> @test_mm256_adds_epu16(<4 x i64> %a0, <4 x i64> %a1) { | ||||
; CHECK-LABEL: test_mm256_adds_epu16: | ; CHECK-LABEL: test_mm256_adds_epu16: | ||||
; CHECK: # %bb.0: | ; CHECK: # %bb.0: | ||||
; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 | ; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 | ||||
; CHECK-NEXT: ret{{[l|q]}} | ; CHECK-NEXT: ret{{[l|q]}} | ||||
%arg0 = bitcast <4 x i64> %a0 to <16 x i16> | %arg0 = bitcast <4 x i64> %a0 to <16 x i16> | ||||
%arg1 = bitcast <4 x i64> %a1 to <16 x i16> | %arg1 = bitcast <4 x i64> %a1 to <16 x i16> | ||||
%res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %arg0, <16 x i16> %arg1) | %1 = add <16 x i16> %arg0, %arg1 | ||||
%bc = bitcast <16 x i16> %res to <4 x i64> | %2 = icmp ugt <16 x i16> %arg0, %1 | ||||
%3 = select <16 x i1> %2, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <16 x i16> %1 | |||||
%bc = bitcast <16 x i16> %3 to <4 x i64> | |||||
ret <4 x i64> %bc | ret <4 x i64> %bc | ||||
} | } | ||||
declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone | |||||
define <4 x i64> @test_mm256_alignr_epi8(<4 x i64> %a0, <4 x i64> %a1) { | define <4 x i64> @test_mm256_alignr_epi8(<4 x i64> %a0, <4 x i64> %a1) { | ||||
; CHECK-LABEL: test_mm256_alignr_epi8: | ; CHECK-LABEL: test_mm256_alignr_epi8: | ||||
; CHECK: # %bb.0: | ; CHECK: # %bb.0: | ||||
; CHECK-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17] | ; CHECK-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17] | ||||
; CHECK-NEXT: ret{{[l|q]}} | ; CHECK-NEXT: ret{{[l|q]}} | ||||
%arg0 = bitcast <4 x i64> %a0 to <32 x i8> | %arg0 = bitcast <4 x i64> %a0 to <32 x i8> | ||||
%arg1 = bitcast <4 x i64> %a1 to <32 x i8> | %arg1 = bitcast <4 x i64> %a1 to <32 x i8> | ||||
▲ Show 20 Lines • Show All 2,371 Lines • ▼ Show 20 Lines | |||||
define <4 x i64> @test_mm256_subs_epi8(<4 x i64> %a0, <4 x i64> %a1) { | define <4 x i64> @test_mm256_subs_epi8(<4 x i64> %a0, <4 x i64> %a1) { | ||||
; CHECK-LABEL: test_mm256_subs_epi8: | ; CHECK-LABEL: test_mm256_subs_epi8: | ||||
; CHECK: # %bb.0: | ; CHECK: # %bb.0: | ||||
; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 | ; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 | ||||
; CHECK-NEXT: ret{{[l|q]}} | ; CHECK-NEXT: ret{{[l|q]}} | ||||
%arg0 = bitcast <4 x i64> %a0 to <32 x i8> | %arg0 = bitcast <4 x i64> %a0 to <32 x i8> | ||||
%arg1 = bitcast <4 x i64> %a1 to <32 x i8> | %arg1 = bitcast <4 x i64> %a1 to <32 x i8> | ||||
%res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %arg0, <32 x i8> %arg1) | %1 = sext <32 x i8> %arg0 to <32 x i16> | ||||
%bc = bitcast <32 x i8> %res to <4 x i64> | %2 = sext <32 x i8> %arg1 to <32 x i16> | ||||
%3 = sub nsw <32 x i16> %1, %2 | |||||
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> | |||||
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> | |||||
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> | |||||
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> | |||||
%8 = trunc <32 x i16> %7 to <32 x i8> | |||||
%bc = bitcast <32 x i8> %8 to <4 x i64> | |||||
ret <4 x i64> %bc | ret <4 x i64> %bc | ||||
} | } | ||||
declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone | |||||
define <4 x i64> @test_mm256_subs_epi16(<4 x i64> %a0, <4 x i64> %a1) { | define <4 x i64> @test_mm256_subs_epi16(<4 x i64> %a0, <4 x i64> %a1) { | ||||
; CHECK-LABEL: test_mm256_subs_epi16: | ; CHECK-LABEL: test_mm256_subs_epi16: | ||||
; CHECK: # %bb.0: | ; CHECK: # %bb.0: | ||||
; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 | ; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 | ||||
; CHECK-NEXT: ret{{[l|q]}} | ; CHECK-NEXT: ret{{[l|q]}} | ||||
%arg0 = bitcast <4 x i64> %a0 to <16 x i16> | %arg0 = bitcast <4 x i64> %a0 to <16 x i16> | ||||
%arg1 = bitcast <4 x i64> %a1 to <16 x i16> | %arg1 = bitcast <4 x i64> %a1 to <16 x i16> | ||||
%res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %arg0, <16 x i16> %arg1) | %1 = sext <16 x i16> %arg0 to <16 x i32> | ||||
%bc = bitcast <16 x i16> %res to <4 x i64> | %2 = sext <16 x i16> %arg1 to <16 x i32> | ||||
%3 = sub nsw <16 x i32> %1, %2 | |||||
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> | |||||
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> | |||||
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> | |||||
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> | |||||
%8 = trunc <16 x i32> %7 to <16 x i16> | |||||
%bc = bitcast <16 x i16> %8 to <4 x i64> | |||||
ret <4 x i64> %bc | ret <4 x i64> %bc | ||||
} | } | ||||
declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone | |||||
define <4 x i64> @test_mm256_subs_epu8(<4 x i64> %a0, <4 x i64> %a1) { | define <4 x i64> @test_mm256_subs_epu8(<4 x i64> %a0, <4 x i64> %a1) { | ||||
; CHECK-LABEL: test_mm256_subs_epu8: | ; CHECK-LABEL: test_mm256_subs_epu8: | ||||
; CHECK: # %bb.0: | ; CHECK: # %bb.0: | ||||
; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 | ; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 | ||||
; CHECK-NEXT: vpsubb %ymm1, %ymm0, %ymm0 | |||||
; CHECK-NEXT: ret{{[l|q]}} | ; CHECK-NEXT: ret{{[l|q]}} | ||||
sroland: Err, so it doesn't actually recognize the pattern? | |||||
Not Done ReplyInline ActionsPlease look at my comment from Thu, May 10, 10:27 AM. tkrupa: Please look at my comment from Thu, May 10, 10:27 AM. | |||||
Not Done ReplyInline ActionsAh sorry I missed that. sroland: Ah sorry I missed that.
I'm not quite sure if that means the pattern can never be folded into a… | |||||
Not Done ReplyInline ActionsIt only fails to fold with fast-isel (one of the non-standard instruction selection options chosen with a compiler flag); it is still always being combined with default SelectionDAGISel. tkrupa: It only fails to fold with fast-isel (one of the non-standard instruction selection options… | |||||
Not Done ReplyInline ActionsOk, we're not using that but I believe at some point we considered it (compile times are really important with jit, and that's my other problem with autoupgrade anyway, this is not going to help there but it might not be significant and I'm thinking that's just the price you have to pay for the compiler getting smarter). sroland: Ok, we're not using that but I believe at some point we considered it (compile times are really… | |||||
Not Done ReplyInline ActionsCan fast-isel case be left like that then? If not, I could try to do the MachineCombiner folding (we don't currently have any combining like that for x86 from what I've seen) or abandon doing the AutoUpgrade for this particular intrinsic. tkrupa: Can fast-isel case be left like that then? If not, I could try to do the MachineCombiner… | |||||
Not Done ReplyInline ActionsFrom my perspective for mesa, yes, it should be ok. sroland: From my perspective for mesa, yes, it should be ok. | |||||
%arg0 = bitcast <4 x i64> %a0 to <32 x i8> | %arg0 = bitcast <4 x i64> %a0 to <32 x i8> | ||||
%arg1 = bitcast <4 x i64> %a1 to <32 x i8> | %arg1 = bitcast <4 x i64> %a1 to <32 x i8> | ||||
%res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %arg0, <32 x i8> %arg1) | %cmp = icmp ugt <32 x i8> %arg0, %arg1 | ||||
%bc = bitcast <32 x i8> %res to <4 x i64> | %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1 | ||||
%sub = sub <32 x i8> %sel, %arg1 | |||||
%bc = bitcast <32 x i8> %sub to <4 x i64> | |||||
ret <4 x i64> %bc | ret <4 x i64> %bc | ||||
} | } | ||||
declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone | |||||
define <4 x i64> @test_mm256_subs_epu16(<4 x i64> %a0, <4 x i64> %a1) { | define <4 x i64> @test_mm256_subs_epu16(<4 x i64> %a0, <4 x i64> %a1) { | ||||
; CHECK-LABEL: test_mm256_subs_epu16: | ; CHECK-LABEL: test_mm256_subs_epu16: | ||||
; CHECK: # %bb.0: | ; CHECK: # %bb.0: | ||||
; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 | ; CHECK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 | ||||
; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 | |||||
; CHECK-NEXT: ret{{[l|q]}} | ; CHECK-NEXT: ret{{[l|q]}} | ||||
%arg0 = bitcast <4 x i64> %a0 to <16 x i16> | %arg0 = bitcast <4 x i64> %a0 to <16 x i16> | ||||
%arg1 = bitcast <4 x i64> %a1 to <16 x i16> | %arg1 = bitcast <4 x i64> %a1 to <16 x i16> | ||||
%res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %arg0, <16 x i16> %arg1) | %cmp = icmp ugt <16 x i16> %arg0, %arg1 | ||||
%bc = bitcast <16 x i16> %res to <4 x i64> | %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1 | ||||
%sub = sub <16 x i16> %sel, %arg1 | |||||
%bc = bitcast <16 x i16> %sub to <4 x i64> | |||||
ret <4 x i64> %bc | ret <4 x i64> %bc | ||||
} | } | ||||
declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone | |||||
define <4 x i64> @test_mm256_unpackhi_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind { | define <4 x i64> @test_mm256_unpackhi_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind { | ||||
; CHECK-LABEL: test_mm256_unpackhi_epi8: | ; CHECK-LABEL: test_mm256_unpackhi_epi8: | ||||
; CHECK: # %bb.0: | ; CHECK: # %bb.0: | ||||
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] | ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] | ||||
; CHECK-NEXT: ret{{[l|q]}} | ; CHECK-NEXT: ret{{[l|q]}} | ||||
%arg0 = bitcast <4 x i64> %a0 to <32 x i8> | %arg0 = bitcast <4 x i64> %a0 to <32 x i8> | ||||
%arg1 = bitcast <4 x i64> %a1 to <32 x i8> | %arg1 = bitcast <4 x i64> %a1 to <32 x i8> | ||||
▲ Show 20 Lines • Show All 95 Lines • Show Last 20 Lines |
Err, so it doesn't actually recognize the pattern?