diff --git a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c --- a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c +++ b/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -fno-experimental-new-pass-manager -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s #include diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -1,5 +1,10 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -fno-experimental-new-pass-manager -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -fno-experimental-new-pass-manager -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s + +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -fexperimental-new-pass-manager -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM +// RUN: %clang_cc1 -fexperimental-new-pass-manager -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include @@ -10480,20 +10485,24 @@ __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_abs_epi32 + // CHECK-LABEL: @test_mm512_mask_abs_epi32 // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} return _mm512_mask_abs_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_abs_epi32 + // CHECK-LABEL: @test_mm512_maskz_abs_epi32 // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} return _mm512_maskz_abs_epi32 (__U,__A); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -1,5 +1,8 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include @@ -4589,6 +4592,8 @@ // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} return _mm_mask_abs_epi32(__W,__U,__A); } @@ -4597,6 +4602,8 @@ // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} return _mm_maskz_abs_epi32(__U,__A); } @@ -4605,6 +4612,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} return _mm256_mask_abs_epi32(__W,__U,__A); } @@ -4613,6 +4622,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} return _mm256_maskz_abs_epi32(__U,__A); } @@ -4668,6 +4679,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_max_epi32(__M,__A,__B); } @@ -4675,6 +4688,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_max_epi32(__W,__M,__A,__B); } @@ -4682,6 +4697,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_max_epi32(__M,__A,__B); } @@ -4689,6 +4706,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_max_epi32(__W,__M,__A,__B); } @@ -4736,6 +4755,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_max_epu32(__M,__A,__B); } @@ -4743,6 +4764,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_max_epu32(__W,__M,__A,__B); } @@ -4750,6 +4773,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_max_epu32(__M,__A,__B); } @@ -4757,6 +4782,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_max_epu32(__W,__M,__A,__B); } @@ -4804,6 +4831,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_min_epi32(__M,__A,__B); } @@ -4811,6 +4840,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_min_epi32(__W,__M,__A,__B); } @@ -4818,6 +4849,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_min_epi32(__M,__A,__B); } @@ -4825,6 +4858,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_min_epi32(__W,__M,__A,__B); } @@ -4872,6 +4907,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_min_epu32(__M,__A,__B); } @@ -4879,6 +4916,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_min_epu32(__W,__M,__A,__B); } @@ -4886,6 +4925,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_min_epu32(__M,__A,__B); } @@ -4893,6 +4934,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_min_epu32(__W,__M,__A,__B); } diff --git a/clang/test/CodeGen/avx512vlbw-builtins.c b/clang/test/CodeGen/avx512vlbw-builtins.c --- a/clang/test/CodeGen/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/avx512vlbw-builtins.c @@ -1,6 +1,10 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include @@ -901,6 +905,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} return _mm_mask_abs_epi8(__W,__U,__A); } @@ -910,6 +916,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} return _mm_maskz_abs_epi8(__U,__A); } @@ -919,6 +927,8 @@ // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} return _mm256_mask_abs_epi8(__W,__U,__A); } @@ -928,6 +938,8 @@ // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} return _mm256_maskz_abs_epi8(__U,__A); } @@ -937,6 +949,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} return _mm_mask_abs_epi16(__W,__U,__A); } @@ -946,6 +960,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} return _mm_maskz_abs_epi16(__U,__A); } @@ -955,6 +971,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} return _mm256_mask_abs_epi16(__W,__U,__A); } @@ -964,6 +982,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} return _mm256_maskz_abs_epi16(__U,__A); } @@ -1229,6 +1249,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_max_epi8(__M,__A,__B); } @@ -1236,6 +1258,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_max_epi8(__W,__M,__A,__B); } @@ -1243,6 +1267,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_max_epi8(__M,__A,__B); } @@ -1250,6 +1276,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_max_epi8(__W,__M,__A,__B); } @@ -1257,6 +1285,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_max_epi16(__M,__A,__B); } @@ -1264,6 +1294,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_max_epi16(__W,__M,__A,__B); } @@ -1271,6 +1303,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_max_epi16(__M,__A,__B); } @@ -1278,6 +1312,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_max_epi16(__W,__M,__A,__B); } @@ -1285,6 +1321,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_max_epu8(__M,__A,__B); } @@ -1292,6 +1330,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_max_epu8(__W,__M,__A,__B); } @@ -1299,6 +1339,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_max_epu8(__M,__A,__B); } @@ -1306,6 +1348,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_max_epu8(__W,__M,__A,__B); } @@ -1313,6 +1357,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_max_epu16(__M,__A,__B); } @@ -1320,6 +1366,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_max_epu16(__W,__M,__A,__B); } @@ -1327,6 +1375,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_max_epu16(__M,__A,__B); } @@ -1334,6 +1384,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_max_epu16(__W,__M,__A,__B); } @@ -1341,6 +1393,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_min_epi8(__M,__A,__B); } @@ -1348,6 +1402,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_min_epi8(__W,__M,__A,__B); } @@ -1355,6 +1411,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_min_epi8(__M,__A,__B); } @@ -1362,6 +1420,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_min_epi8(__W,__M,__A,__B); } @@ -1369,6 +1429,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_min_epi16(__M,__A,__B); } @@ -1376,6 +1438,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_min_epi16(__W,__M,__A,__B); } @@ -1383,6 +1447,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_min_epi16(__M,__A,__B); } @@ -1390,6 +1456,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_min_epi16(__W,__M,__A,__B); } @@ -1397,6 +1465,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_min_epu8(__M,__A,__B); } @@ -1404,6 +1474,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_min_epu8(__W,__M,__A,__B); } @@ -1411,6 +1483,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_min_epu8(__M,__A,__B); } @@ -1418,6 +1492,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_min_epu8(__W,__M,__A,__B); } @@ -1425,6 +1501,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_min_epu16(__M,__A,__B); } @@ -1432,6 +1510,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_min_epu16(__W,__M,__A,__B); } @@ -1439,6 +1519,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_min_epu16(__M,__A,__B); } @@ -1446,6 +1528,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_min_epu16(__W,__M,__A,__B); } diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl --- a/clang/test/CodeGenOpenCL/convergent.cl +++ b/clang/test/CodeGenOpenCL/convergent.cl @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fno-experimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-LEGACY +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fexperimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-NEWPM // This is initially assumed convergent, but can be deduced to not require it. @@ -117,7 +118,12 @@ // CHECK: [[for_body]]: // CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]] // CHECK-NOT: call spir_func void @nodupfun() -// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] + +// The new PM produces a slightly different IR for the loop from the legacy PM, +// but the test still checks that the loop is not unrolled. +// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] +// CHECK-NEW: br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]] +// CHECK-NEW: [[for_body_crit_edge]]: void test_not_unroll() { for (int i = 0; i < 10; i++)