diff --git a/clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c b/clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c --- a/clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c +++ b/clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c @@ -62,43 +62,43 @@ return vld1_bf16_x2(ptr); } // CHECK-LABEL: test_vld1_bf16_x2 -// CHECK64: %vld1xN = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x2.v4bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld1xN = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x2.v4bf16.p0bf16(bfloat* %ptr) +// CHECK64: %vld1xN = call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x2.v4bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld1xN = call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x2.v4bf16.p0bf16(bfloat* %ptr) bfloat16x8x2_t test_vld1q_bf16_x2(bfloat16_t const *ptr) { return vld1q_bf16_x2(ptr); } // CHECK-LABEL: test_vld1q_bf16_x2 -// CHECK64: %vld1xN = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x2.v8bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld1xN = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x2.v8bf16.p0bf16(bfloat* %ptr) +// CHECK64: %vld1xN = call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x2.v8bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld1xN = call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x2.v8bf16.p0bf16(bfloat* %ptr) bfloat16x4x3_t test_vld1_bf16_x3(bfloat16_t const *ptr) { return vld1_bf16_x3(ptr); } // CHECK-LABEL: test_vld1_bf16_x3 -// CHECK64: %vld1xN = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x3.v4bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld1xN = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x3.v4bf16.p0bf16(bfloat* %ptr) +// CHECK64: %vld1xN = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x3.v4bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld1xN = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x3.v4bf16.p0bf16(bfloat* %ptr) bfloat16x8x3_t test_vld1q_bf16_x3(bfloat16_t const *ptr) { return vld1q_bf16_x3(ptr); } // CHECK-LABEL: test_vld1q_bf16_x3 -// CHECK64: %vld1xN = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x3.v8bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld1xN = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x3.v8bf16.p0bf16(bfloat* %ptr) +// CHECK64: %vld1xN = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x3.v8bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld1xN = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x3.v8bf16.p0bf16(bfloat* %ptr) bfloat16x4x4_t test_vld1_bf16_x4(bfloat16_t const *ptr) { return vld1_bf16_x4(ptr); } // CHECK-LABEL: test_vld1_bf16_x4 -// CHECK64: %vld1xN = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x4.v4bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld1xN = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x4.v4bf16.p0bf16(bfloat* %ptr) +// CHECK64: %vld1xN = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x4.v4bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld1xN = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld1x4.v4bf16.p0bf16(bfloat* %ptr) bfloat16x8x4_t test_vld1q_bf16_x4(bfloat16_t const *ptr) { return vld1q_bf16_x4(ptr); } // CHECK-LABEL: test_vld1q_bf16_x4 -// CHECK64: %vld1xN = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x4.v8bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld1xN = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x4.v8bf16.p0bf16(bfloat* %ptr) +// CHECK64: %vld1xN = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x4.v8bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld1xN = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld1x4.v8bf16.p0bf16(bfloat* %ptr) bfloat16x8_t test_vld1q_dup_bf16(bfloat16_t const *ptr) { return vld1q_dup_bf16(ptr); @@ -118,139 +118,139 @@ } // CHECK-LABEL: test_vld2_bf16 // CHECK64: %0 = bitcast bfloat* %ptr to <4 x bfloat>* -// CHECK64-NEXT: %vld2 = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2.v4bf16.p0v4bf16(<4 x bfloat>* %0) +// CHECK64-NEXT: %vld2 = call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2.v4bf16.p0v4bf16(<4 x bfloat>* %0) // CHECK32: %0 = bitcast bfloat* %ptr to i8* -// CHECK32-NEXT: %vld2_v = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2.v4bf16.p0i8(i8* %0, i32 2) +// CHECK32-NEXT: %vld2_v = call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2.v4bf16.p0i8(i8* %0, i32 2) bfloat16x8x2_t test_vld2q_bf16(bfloat16_t const *ptr) { return vld2q_bf16(ptr); } // CHECK-LABEL: test_vld2q_bf16 // CHECK64: %0 = bitcast bfloat* %ptr to <8 x bfloat>* -// CHECK64-NEXT: %vld2 = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2.v8bf16.p0v8bf16(<8 x bfloat>* %0) +// CHECK64-NEXT: %vld2 = call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2.v8bf16.p0v8bf16(<8 x bfloat>* %0) // CHECK32: %0 = bitcast bfloat* %ptr to i8* -// CHECK32-NEXT: %vld2q_v = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2.v8bf16.p0i8(i8* %0, i32 2) +// CHECK32-NEXT: %vld2q_v = call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2.v8bf16.p0i8(i8* %0, i32 2) bfloat16x4x2_t test_vld2_lane_bf16(bfloat16_t const *ptr, bfloat16x4x2_t src) { return vld2_lane_bf16(ptr, src, 1); } // CHECK-LABEL: test_vld2_lane_bf16 -// CHECK64: %vld2_lane = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2lane.v4bf16.p0i8(<4 x bfloat> %src.coerce.fca.0.extract, <4 x bfloat> %src.coerce.fca.1.extract, i64 1, i8* %0) -// CHECK32: %vld2_lane_v = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2lane.v4bf16.p0i8(i8* %2, <4 x bfloat> %0, <4 x bfloat> %1, i32 1, i32 2) +// CHECK64: %vld2_lane = call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2lane.v4bf16.p0i8(<4 x bfloat> %src.coerce.fca.0.extract, <4 x bfloat> %src.coerce.fca.1.extract, i64 1, i8* %0) +// CHECK32: %vld2_lane_v = call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2lane.v4bf16.p0i8(i8* %2, <4 x bfloat> %0, <4 x bfloat> %1, i32 1, i32 2) bfloat16x8x2_t test_vld2q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x2_t src) { return vld2q_lane_bf16(ptr, src, 7); } // CHECK-LABEL: test_vld2q_lane_bf16 -// CHECK64: %vld2_lane = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2lane.v8bf16.p0i8(<8 x bfloat> %src.coerce.fca.0.extract, <8 x bfloat> %src.coerce.fca.1.extract, i64 7, i8* %0) -// CHECK32: %vld2q_lane_v = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2lane.v8bf16.p0i8(i8* %2, <8 x bfloat> %0, <8 x bfloat> %1, i32 7, i32 2) +// CHECK64: %vld2_lane = call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2lane.v8bf16.p0i8(<8 x bfloat> %src.coerce.fca.0.extract, <8 x bfloat> %src.coerce.fca.1.extract, i64 7, i8* %0) +// CHECK32: %vld2q_lane_v = call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2lane.v8bf16.p0i8(i8* %2, <8 x bfloat> %0, <8 x bfloat> %1, i32 7, i32 2) bfloat16x4x3_t test_vld3_bf16(bfloat16_t const *ptr) { return vld3_bf16(ptr); } // CHECK-LABEL: test_vld3_bf16 -// CHECK64: %vld3 = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3.v4bf16.p0v4bf16(<4 x bfloat>* %0) +// CHECK64: %vld3 = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3.v4bf16.p0v4bf16(<4 x bfloat>* %0) // CHECK32: %0 = bitcast bfloat* %ptr to i8* -// CHECK32-NEXT: %vld3_v = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3.v4bf16.p0i8(i8* %0, i32 2) +// CHECK32-NEXT: %vld3_v = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3.v4bf16.p0i8(i8* %0, i32 2) bfloat16x8x3_t test_vld3q_bf16(bfloat16_t const *ptr) { return vld3q_bf16(ptr); } // CHECK-LABEL: test_vld3q_bf16 -// CHECK64: %vld3 = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3.v8bf16.p0v8bf16(<8 x bfloat>* %0) +// CHECK64: %vld3 = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3.v8bf16.p0v8bf16(<8 x bfloat>* %0) // CHECK32: %0 = bitcast bfloat* %ptr to i8* -// CHECK32-NEXT: %vld3q_v = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3.v8bf16.p0i8(i8* %0, i32 2) +// CHECK32-NEXT: %vld3q_v = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3.v8bf16.p0i8(i8* %0, i32 2) bfloat16x4x3_t test_vld3_lane_bf16(bfloat16_t const *ptr, bfloat16x4x3_t src) { return vld3_lane_bf16(ptr, src, 1); } // CHECK-LABEL: test_vld3_lane_bf16 -// CHECK64: %vld3_lane = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3lane.v4bf16.p0i8(<4 x bfloat> %src.coerce.fca.0.extract, <4 x bfloat> %src.coerce.fca.1.extract, <4 x bfloat> %src.coerce.fca.2.extract, i64 1, i8* %0) +// CHECK64: %vld3_lane = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3lane.v4bf16.p0i8(<4 x bfloat> %src.coerce.fca.0.extract, <4 x bfloat> %src.coerce.fca.1.extract, <4 x bfloat> %src.coerce.fca.2.extract, i64 1, i8* %0) // CHECK32: %3 = bitcast bfloat* %ptr to i8* -// CHECK32-NEXT: %vld3_lane_v = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3lane.v4bf16.p0i8(i8* %3, <4 x bfloat> %0, <4 x bfloat> %1, <4 x bfloat> %2, i32 1, i32 2) +// CHECK32-NEXT: %vld3_lane_v = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3lane.v4bf16.p0i8(i8* %3, <4 x bfloat> %0, <4 x bfloat> %1, <4 x bfloat> %2, i32 1, i32 2) bfloat16x8x3_t test_vld3q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x3_t src) { return vld3q_lane_bf16(ptr, src, 7); // return vld3q_lane_bf16(ptr, src, 8); } // CHECK-LABEL: test_vld3q_lane_bf16 -// CHECK64: %vld3_lane = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3lane.v8bf16.p0i8(<8 x bfloat> %src.coerce.fca.0.extract, <8 x bfloat> %src.coerce.fca.1.extract, <8 x bfloat> %src.coerce.fca.2.extract, i64 7, i8* %0) +// CHECK64: %vld3_lane = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3lane.v8bf16.p0i8(<8 x bfloat> %src.coerce.fca.0.extract, <8 x bfloat> %src.coerce.fca.1.extract, <8 x bfloat> %src.coerce.fca.2.extract, i64 7, i8* %0) // CHECK32: %3 = bitcast bfloat* %ptr to i8* -// CHECK32-NEXT: %vld3q_lane_v = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3lane.v8bf16.p0i8(i8* %3, <8 x bfloat> %0, <8 x bfloat> %1, <8 x bfloat> %2, i32 7, i32 2) +// CHECK32-NEXT: %vld3q_lane_v = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3lane.v8bf16.p0i8(i8* %3, <8 x bfloat> %0, <8 x bfloat> %1, <8 x bfloat> %2, i32 7, i32 2) bfloat16x4x4_t test_vld4_bf16(bfloat16_t const *ptr) { return vld4_bf16(ptr); } // CHECK-LABEL: test_vld4_bf16 -// CHECK64: %vld4 = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4.v4bf16.p0v4bf16(<4 x bfloat>* %0) +// CHECK64: %vld4 = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4.v4bf16.p0v4bf16(<4 x bfloat>* %0) // CHECK32: %0 = bitcast bfloat* %ptr to i8* -// CHECK32-NEXT: %vld4_v = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4.v4bf16.p0i8(i8* %0, i32 2) +// CHECK32-NEXT: %vld4_v = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4.v4bf16.p0i8(i8* %0, i32 2) bfloat16x8x4_t test_vld4q_bf16(bfloat16_t const *ptr) { return vld4q_bf16(ptr); } // CHECK-LABEL: test_vld4q_bf16 -// CHECK64: %vld4 = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4.v8bf16.p0v8bf16(<8 x bfloat>* %0) +// CHECK64: %vld4 = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4.v8bf16.p0v8bf16(<8 x bfloat>* %0) // CHECK32: %0 = bitcast bfloat* %ptr to i8* -// CHECK32-NEXT: %vld4q_v = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4.v8bf16.p0i8(i8* %0, i32 2) +// CHECK32-NEXT: %vld4q_v = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4.v8bf16.p0i8(i8* %0, i32 2) bfloat16x4x4_t test_vld4_lane_bf16(bfloat16_t const *ptr, bfloat16x4x4_t src) { return vld4_lane_bf16(ptr, src, 1); } // CHECK-LABEL: test_vld4_lane_bf16 -// CHECK64: %vld4_lane = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4lane.v4bf16.p0i8(<4 x bfloat> %src.coerce.fca.0.extract, <4 x bfloat> %src.coerce.fca.1.extract, <4 x bfloat> %src.coerce.fca.2.extract, <4 x bfloat> %src.coerce.fca.3.extract, i64 1, i8* %0) +// CHECK64: %vld4_lane = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4lane.v4bf16.p0i8(<4 x bfloat> %src.coerce.fca.0.extract, <4 x bfloat> %src.coerce.fca.1.extract, <4 x bfloat> %src.coerce.fca.2.extract, <4 x bfloat> %src.coerce.fca.3.extract, i64 1, i8* %0) // CHECK32: %4 = bitcast bfloat* %ptr to i8* -// CHECK32-NEXT: %vld4_lane_v = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4lane.v4bf16.p0i8(i8* %4, <4 x bfloat> %0, <4 x bfloat> %1, <4 x bfloat> %2, <4 x bfloat> %3, i32 1, i32 2) +// CHECK32-NEXT: %vld4_lane_v = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4lane.v4bf16.p0i8(i8* %4, <4 x bfloat> %0, <4 x bfloat> %1, <4 x bfloat> %2, <4 x bfloat> %3, i32 1, i32 2) bfloat16x8x4_t test_vld4q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x4_t src) { return vld4q_lane_bf16(ptr, src, 7); } // CHECK-LABEL: test_vld4q_lane_bf16 -// CHECK64: %vld4_lane = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4lane.v8bf16.p0i8(<8 x bfloat> %src.coerce.fca.0.extract, <8 x bfloat> %src.coerce.fca.1.extract, <8 x bfloat> %src.coerce.fca.2.extract, <8 x bfloat> %src.coerce.fca.3.extract, i64 7, i8* %0) +// CHECK64: %vld4_lane = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4lane.v8bf16.p0i8(<8 x bfloat> %src.coerce.fca.0.extract, <8 x bfloat> %src.coerce.fca.1.extract, <8 x bfloat> %src.coerce.fca.2.extract, <8 x bfloat> %src.coerce.fca.3.extract, i64 7, i8* %0) // CHECK32: %4 = bitcast bfloat* %ptr to i8* -// CHECK32-NEXT: %vld4q_lane_v = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4lane.v8bf16.p0i8(i8* %4, <8 x bfloat> %0, <8 x bfloat> %1, <8 x bfloat> %2, <8 x bfloat> %3, i32 7, i32 2) +// CHECK32-NEXT: %vld4q_lane_v = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4lane.v8bf16.p0i8(i8* %4, <8 x bfloat> %0, <8 x bfloat> %1, <8 x bfloat> %2, <8 x bfloat> %3, i32 7, i32 2) bfloat16x4x2_t test_vld2_dup_bf16(bfloat16_t const *ptr) { return vld2_dup_bf16(ptr); } // CHECK-LABEL: test_vld2_dup_bf16 -// CHECK64: %vld2 = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2r.v4bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld2_dup_v = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2dup.v4bf16.p0i8(i8* %0, i32 2) +// CHECK64: %vld2 = call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld2r.v4bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld2_dup_v = call { <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld2dup.v4bf16.p0i8(i8* %0, i32 2) bfloat16x8x2_t test_vld2q_dup_bf16(bfloat16_t const *ptr) { return vld2q_dup_bf16(ptr); } // CHECK-LABEL: test_vld2q_dup_bf16 -// CHECK64: %vld2 = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2r.v8bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld2q_dup_v = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2dup.v8bf16.p0i8(i8* %0, i32 2) +// CHECK64: %vld2 = call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld2r.v8bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld2q_dup_v = call { <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld2dup.v8bf16.p0i8(i8* %0, i32 2) bfloat16x4x3_t test_vld3_dup_bf16(bfloat16_t const *ptr) { return vld3_dup_bf16(ptr); } // CHECK-LABEL: test_vld3_dup_bf16 -// CHECK64: %vld3 = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3r.v4bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld3_dup_v = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3dup.v4bf16.p0i8(i8* %0, i32 2) +// CHECK64: %vld3 = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld3r.v4bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld3_dup_v = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld3dup.v4bf16.p0i8(i8* %0, i32 2) bfloat16x8x3_t test_vld3q_dup_bf16(bfloat16_t const *ptr) { return vld3q_dup_bf16(ptr); } // CHECK-LABEL: test_vld3q_dup_bf16 -// CHECK64: %vld3 = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3r.v8bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld3q_dup_v = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3dup.v8bf16.p0i8(i8* %0, i32 2) +// CHECK64: %vld3 = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld3r.v8bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld3q_dup_v = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld3dup.v8bf16.p0i8(i8* %0, i32 2) bfloat16x4x4_t test_vld4_dup_bf16(bfloat16_t const *ptr) { return vld4_dup_bf16(ptr); } // CHECK-LABEL: test_vld4_dup_bf16 -// CHECK64: %vld4 = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4r.v4bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld4_dup_v = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4dup.v4bf16.p0i8(i8* %0, i32 2) +// CHECK64: %vld4 = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld4r.v4bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld4_dup_v = call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.arm.neon.vld4dup.v4bf16.p0i8(i8* %0, i32 2) bfloat16x8x4_t test_vld4q_dup_bf16(bfloat16_t const *ptr) { return vld4q_dup_bf16(ptr); } // CHECK-LABEL: test_vld4q_dup_bf16 -// CHECK64: %vld4 = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4r.v8bf16.p0bf16(bfloat* %ptr) -// CHECK32: %vld4q_dup_v = tail call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4dup.v8bf16.p0i8(i8* %0, i32 2) +// CHECK64: %vld4 = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld4r.v8bf16.p0bf16(bfloat* %ptr) +// CHECK32: %vld4q_dup_v = call { <8 x bfloat>, <8 x bfloat>, <8 x bfloat>, <8 x bfloat> } @llvm.arm.neon.vld4dup.v8bf16.p0i8(i8* %0, i32 2) void test_vst1_bf16(bfloat16_t *ptr, bfloat16x4_t val) { vst1_bf16(ptr, val); diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c @@ -61,7 +61,7 @@ // CHECK-LABEL: @test_vmaxaq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.vmaxa.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // @@ -77,7 +77,7 @@ // CHECK-LABEL: @test_vmaxaq_m_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.vmaxa.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // @@ -93,7 +93,7 @@ // CHECK-LABEL: @test_vmaxaq_m_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.vmaxa.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c @@ -6,8 +6,8 @@ // CHECK-LABEL: @test_vmaxnmaq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // @@ -22,8 +22,8 @@ // CHECK-LABEL: @test_vmaxnmaq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // @@ -39,7 +39,7 @@ // CHECK-LABEL: @test_vmaxnmaq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // @@ -55,7 +55,7 @@ // CHECK-LABEL: @test_vmaxnmaq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c @@ -35,7 +35,7 @@ // CHECK-LABEL: @test_vmaxnmq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // @@ -51,7 +51,7 @@ // CHECK-LABEL: @test_vmaxnmq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // @@ -67,7 +67,7 @@ // CHECK-LABEL: @test_vmaxnmq_x_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) // CHECK-NEXT: ret <8 x half> [[TMP2]] // @@ -83,7 +83,7 @@ // CHECK-LABEL: @test_vmaxnmq_x_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) // CHECK-NEXT: ret <4 x float> [[TMP2]] // diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c @@ -52,7 +52,7 @@ // CHECK-LABEL: @test_vmaxq_m_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // @@ -68,7 +68,7 @@ // CHECK-LABEL: @test_vmaxq_m_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // @@ -84,7 +84,7 @@ // CHECK-LABEL: @test_vmaxq_m_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -100,7 +100,7 @@ // CHECK-LABEL: @test_vmaxq_x_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // @@ -116,7 +116,7 @@ // CHECK-LABEL: @test_vmaxq_x_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // @@ -132,7 +132,7 @@ // CHECK-LABEL: @test_vmaxq_x_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c @@ -61,7 +61,7 @@ // CHECK-LABEL: @test_vminaq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.vmina.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // @@ -77,7 +77,7 @@ // CHECK-LABEL: @test_vminaq_m_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.vmina.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // @@ -93,7 +93,7 @@ // CHECK-LABEL: @test_vminaq_m_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.vmina.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c @@ -6,8 +6,8 @@ // CHECK-LABEL: @test_vminnmaq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // @@ -22,8 +22,8 @@ // CHECK-LABEL: @test_vminnmaq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // @@ -39,7 +39,7 @@ // CHECK-LABEL: @test_vminnmaq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // @@ -55,7 +55,7 @@ // CHECK-LABEL: @test_vminnmaq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c @@ -35,7 +35,7 @@ // CHECK-LABEL: @test_vminnmq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // @@ -51,7 +51,7 @@ // CHECK-LABEL: @test_vminnmq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // @@ -67,7 +67,7 @@ // CHECK-LABEL: @test_vminnmq_x_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) // CHECK-NEXT: ret <8 x half> [[TMP2]] // @@ -83,7 +83,7 @@ // CHECK-LABEL: @test_vminnmq_x_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) // CHECK-NEXT: ret <4 x float> [[TMP2]] // diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vminq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c @@ -52,7 +52,7 @@ // CHECK-LABEL: @test_vminq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // @@ -68,7 +68,7 @@ // CHECK-LABEL: @test_vminq_m_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // @@ -84,7 +84,7 @@ // CHECK-LABEL: @test_vminq_m_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -100,7 +100,7 @@ // CHECK-LABEL: @test_vminq_x_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // @@ -116,7 +116,7 @@ // CHECK-LABEL: @test_vminq_x_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // @@ -132,7 +132,7 @@ // CHECK-LABEL: @test_vminq_x_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // diff --git a/clang/test/CodeGen/arm64-mte.c b/clang/test/CodeGen/arm64-mte.c --- a/clang/test/CodeGen/arm64-mte.c +++ b/clang/test/CodeGen/arm64-mte.c @@ -47,7 +47,7 @@ unsigned exclude_tag(int *a, unsigned m) { // CHECK: [[T0:%[0-9]+]] = zext i32 %m to i64 // CHECK: [[T1:%[0-9]+]] = bitcast i32* %a to i8* -// CHECK: [[T2:%[0-9]+]] = tail call i64 @llvm.aarch64.gmi(i8* [[T1]], i64 [[T0]]) +// CHECK: [[T2:%[0-9]+]] = call i64 @llvm.aarch64.gmi(i8* [[T1]], i64 [[T0]]) // CHECK: trunc i64 [[T2]] to i32 return __arm_mte_exclude_tag(a, m); } @@ -103,7 +103,7 @@ // Check arithmetic promotion on return type // CHECK-LABEL: define i32 @subtract_pointers4 int subtract_pointers4(void* a, void *b) { -// CHECK: [[T0:%[0-9]+]] = tail call i64 @llvm.aarch64.subp(i8* %a, i8* %b) +// CHECK: [[T0:%[0-9]+]] = call i64 @llvm.aarch64.subp(i8* %a, i8* %b) // CHECK-NEXT: %cmp = icmp slt i64 [[T0]], 1 // CHECK-NEXT: = zext i1 %cmp to i32 return __arm_mte_ptrdiff(a,b) <= 0; diff --git a/clang/test/CodeGen/builtins-multiprecision.c b/clang/test/CodeGen/builtins-multiprecision.c --- a/clang/test/CodeGen/builtins-multiprecision.c +++ b/clang/test/CodeGen/builtins-multiprecision.c @@ -5,10 +5,10 @@ unsigned char test_addcb(unsigned char x, unsigned char y, unsigned char carryin, unsigned char *z) { // CHECK: @test_addcb - // CHECK: %{{.+}} = {{.*}} call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) + // CHECK: %{{.+}} = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 0 - // CHECK: %{{.+}} = {{.*}} call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %{{.+}}, i8 %carryin) + // CHECK: %{{.+}} = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %{{.+}}, i8 %carryin) // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 0 // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} @@ -24,10 +24,10 @@ unsigned short test_addcs(unsigned short x, unsigned short y, unsigned short carryin, unsigned short *z) { // CHECK: @test_addcs - // CHECK: %{{.+}} = {{.*}} call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %x, i16 %y) + // CHECK: %{{.+}} = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %x, i16 %y) // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 0 - // CHECK: %{{.+}} = {{.*}} call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %{{.+}}, i16 %carryin) + // CHECK: %{{.+}} = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %{{.+}}, i16 %carryin) // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 0 // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} @@ -42,10 +42,10 @@ unsigned test_addc(unsigned x, unsigned y, unsigned carryin, unsigned *z) { // CHECK: @test_addc - // CHECK: %{{.+}} = {{.*}} call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) + // CHECK: %{{.+}} = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 0 - // CHECK: %{{.+}} = {{.*}} call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %carryin) + // CHECK: %{{.+}} = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %carryin) // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 0 // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} @@ -61,10 +61,10 @@ unsigned long carryin, unsigned long *z) { // long is i32 on i686, i64 on x86_64. // CHECK: @test_addcl([[UL:i32|i64]] %x - // CHECK: %{{.+}} = {{.*}} call { [[UL]], i1 } @llvm.uadd.with.overflow.[[UL]]([[UL]] %x, [[UL]] %y) + // CHECK: %{{.+}} = call { [[UL]], i1 } @llvm.uadd.with.overflow.[[UL]]([[UL]] %x, [[UL]] %y) // CHECK: %{{.+}} = extractvalue { [[UL]], i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { [[UL]], i1 } %{{.+}}, 0 - // CHECK: %{{.+}} = {{.*}} call { [[UL]], i1 } @llvm.uadd.with.overflow.[[UL]]([[UL]] %{{.+}}, [[UL]] %carryin) + // CHECK: %{{.+}} = call { [[UL]], i1 } @llvm.uadd.with.overflow.[[UL]]([[UL]] %{{.+}}, [[UL]] %carryin) // CHECK: %{{.+}} = extractvalue { [[UL]], i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { [[UL]], i1 } %{{.+}}, 0 // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} @@ -80,10 +80,10 @@ unsigned long long carryin, unsigned long long *z) { // CHECK: @test_addcll - // CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x, i64 %y) + // CHECK: %{{.+}} = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x, i64 %y) // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0 - // CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %{{.+}}, i64 %carryin) + // CHECK: %{{.+}} = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %{{.+}}, i64 %carryin) // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0 // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} @@ -98,10 +98,10 @@ unsigned char test_subcb(unsigned char x, unsigned char y, unsigned char carryin, unsigned char *z) { // CHECK: @test_subcb - // CHECK: %{{.+}} = {{.*}} call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %x, i8 %y) + // CHECK: %{{.+}} = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %x, i8 %y) // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 0 - // CHECK: %{{.+}} = {{.*}} call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %{{.+}}, i8 %carryin) + // CHECK: %{{.+}} = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %{{.+}}, i8 %carryin) // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i8, i1 } %{{.+}}, 0 // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} @@ -117,10 +117,10 @@ unsigned short test_subcs(unsigned short x, unsigned short y, unsigned short carryin, unsigned short *z) { // CHECK: @test_subcs - // CHECK: %{{.+}} = {{.*}} call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %x, i16 %y) + // CHECK: %{{.+}} = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %x, i16 %y) // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 0 - // CHECK: %{{.+}} = {{.*}} call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %{{.+}}, i16 %carryin) + // CHECK: %{{.+}} = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %{{.+}}, i16 %carryin) // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i16, i1 } %{{.+}}, 0 // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} @@ -135,10 +135,10 @@ unsigned test_subc(unsigned x, unsigned y, unsigned carryin, unsigned *z) { // CHECK: @test_subc - // CHECK: %{{.+}} = {{.*}} call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y) + // CHECK: %{{.+}} = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y) // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 0 - // CHECK: %{{.+}} = {{.*}} call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %{{.+}}, i32 %carryin) + // CHECK: %{{.+}} = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %{{.+}}, i32 %carryin) // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i32, i1 } %{{.+}}, 0 // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} @@ -153,10 +153,10 @@ unsigned long test_subcl(unsigned long x, unsigned long y, unsigned long carryin, unsigned long *z) { // CHECK: @test_subcl([[UL:i32|i64]] %x - // CHECK: %{{.+}} = {{.*}} call { [[UL]], i1 } @llvm.usub.with.overflow.[[UL]]([[UL]] %x, [[UL]] %y) + // CHECK: %{{.+}} = call { [[UL]], i1 } @llvm.usub.with.overflow.[[UL]]([[UL]] %x, [[UL]] %y) // CHECK: %{{.+}} = extractvalue { [[UL]], i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { [[UL]], i1 } %{{.+}}, 0 - // CHECK: %{{.+}} = {{.*}} call { [[UL]], i1 } @llvm.usub.with.overflow.[[UL]]([[UL]] %{{.+}}, [[UL]] %carryin) + // CHECK: %{{.+}} = call { [[UL]], i1 } @llvm.usub.with.overflow.[[UL]]([[UL]] %{{.+}}, [[UL]] %carryin) // CHECK: %{{.+}} = extractvalue { [[UL]], i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { [[UL]], i1 } %{{.+}}, 0 // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} @@ -172,10 +172,10 @@ unsigned long long carryin, unsigned long long *z) { // CHECK: @test_subcll - // CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x, i64 %y) + // CHECK: %{{.+}} = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x, i64 %y) // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0 - // CHECK: %{{.+}} = {{.*}} call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %{{.+}}, i64 %carryin) + // CHECK: %{{.+}} = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %{{.+}}, i64 %carryin) // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 1 // CHECK: %{{.+}} = extractvalue { i64, i1 } %{{.+}}, 0 // CHECK: %{{.+}} = or i1 %{{.+}}, %{{.+}} diff --git a/clang/test/CodeGen/builtins-systemz-zvector-constrained.c b/clang/test/CodeGen/builtins-systemz-zvector-constrained.c --- a/clang/test/CodeGen/builtins-systemz-zvector-constrained.c +++ b/clang/test/CodeGen/builtins-systemz-zvector-constrained.c @@ -205,7 +205,7 @@ // CHECK-ASM: vflpdb vd = vec_nabs(vd); - // CHECK: [[ABS:%[^ ]+]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}}) + // CHECK: [[ABS:%[^ ]+]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}}) // CHECK-NEXT: fneg <2 x double> [[ABS]] // CHECK-ASM: vflndb @@ -225,7 +225,7 @@ // CHECK: call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> [[VAL]], metadata !{{.*}}) // (emulated) vec_st2f(vd, ptrf); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double> %{{.*}}, metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double> %{{.*}}, metadata !{{.*}}) // CHECK: store <2 x float> [[VAL]], <2 x float>* %{{.*}} // (emulated) @@ -236,19 +236,19 @@ // CHECK: call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) // (emulated) vd = vec_ctd(vsl, 1); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) // (emulated) vd = vec_ctd(vul, 1); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) // (emulated) vd = vec_ctd(vsl, 31); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) // (emulated) vd = vec_ctd(vul, 31); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) // (emulated) @@ -259,19 +259,19 @@ // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %{{.*}}, metadata !{{.*}}) // (emulated) vsl = vec_ctsl(vd, 1); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> {{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> {{.*}}, <2 x double> , metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) vul = vec_ctul(vd, 1); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) vsl = vec_ctsl(vd, 31); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) vul = vec_ctul(vd, 31); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) diff --git a/clang/test/CodeGen/builtins-systemz-zvector.c b/clang/test/CodeGen/builtins-systemz-zvector.c --- a/clang/test/CodeGen/builtins-systemz-zvector.c +++ b/clang/test/CodeGen/builtins-systemz-zvector.c @@ -4441,7 +4441,7 @@ // CHECK-ASM: vflpdb vd = vec_nabs(vd); - // CHECK: [[ABS:%[^ ]+]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}}) + // CHECK: [[ABS:%[^ ]+]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}}) // CHECK-NEXT: fneg <2 x double> [[ABS]] // CHECK-ASM: vflndb diff --git a/clang/test/CodeGen/builtins-systemz-zvector2-constrained.c b/clang/test/CodeGen/builtins-systemz-zvector2-constrained.c --- a/clang/test/CodeGen/builtins-systemz-zvector2-constrained.c +++ b/clang/test/CodeGen/builtins-systemz-zvector2-constrained.c @@ -382,11 +382,11 @@ // CHECK-ASM: vflpdb vf = vec_nabs(vf); - // CHECK: [[ABS:%[^ ]+]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{.*}}) + // CHECK: [[ABS:%[^ ]+]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{.*}}) // CHECK-NEXT: fneg <4 x float> [[ABS]] // CHECK-ASM: vflnsb vd = vec_nabs(vd); - // CHECK: [[ABS:%[^ ]+]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}}) + // CHECK: [[ABS:%[^ ]+]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}}) // CHECK-NEXT: fneg <2 x double> [[ABS]] // CHECK-ASM: vflndb @@ -421,22 +421,22 @@ // CHECK-ASM: vfmsdb vf = vec_nmadd(vf, vf, vf); - // CHECK: [[RES:%[^ ]+]] = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) + // CHECK: [[RES:%[^ ]+]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) // CHECK: fneg <4 x float> [[RES]] // CHECK-ASM: vfnmasb vd = vec_nmadd(vd, vd, vd); - // CHECK: [[RES:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) + // CHECK: [[RES:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) // CHECK: fneg <2 x double> [[RES]] // CHECK-ASM: vfnmadb vf = vec_nmsub(vf, vf, vf); // CHECK: [[NEG:%[^ ]+]] = fneg <4 x float> %{{.*}} - // CHECK: [[RES:%[^ ]+]] = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]], metadata !{{.*}}) + // CHECK: [[RES:%[^ ]+]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]], metadata !{{.*}}) // CHECK: fneg <4 x float> [[RES]] // CHECK-ASM: vfnmssb vd = vec_nmsub(vd, vd, vd); // CHECK: [[NEG:%[^ ]+]] = fneg <2 x double> %{{.*}} - // CHECK: [[RES:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]], metadata !{{.*}}) + // CHECK: [[RES:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]], metadata !{{.*}}) // CHECK: fneg <2 x double> [[RES]] // CHECK-ASM: vfnmsdb diff --git a/clang/test/CodeGen/builtins-systemz-zvector2.c b/clang/test/CodeGen/builtins-systemz-zvector2.c --- a/clang/test/CodeGen/builtins-systemz-zvector2.c +++ b/clang/test/CodeGen/builtins-systemz-zvector2.c @@ -685,11 +685,11 @@ // CHECK-ASM: vflpdb vf = vec_nabs(vf); - // CHECK: [[ABS:%[^ ]+]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{.*}}) + // CHECK: [[ABS:%[^ ]+]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{.*}}) // CHECK-NEXT: fneg <4 x float> [[ABS]] // CHECK-ASM: vflnsb vd = vec_nabs(vd); - // CHECK: [[ABS:%[^ ]+]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}}) + // CHECK: [[ABS:%[^ ]+]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}}) // CHECK-NEXT: fneg <2 x double> [[ABS]] // CHECK-ASM: vflndb @@ -724,22 +724,22 @@ // CHECK-ASM: vfmsdb vf = vec_nmadd(vf, vf, vf); - // CHECK: [[RES:%[^ ]+]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: [[RES:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: fneg <4 x float> [[RES]] // CHECK-ASM: vfnmasb vd = vec_nmadd(vd, vd, vd); - // CHECK: [[RES:%[^ ]+]] = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: [[RES:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK: fneg <2 x double> [[RES]] // CHECK-ASM: vfnmadb vf = vec_nmsub(vf, vf, vf); // CHECK: [[NEG:%[^ ]+]] = fneg <4 x float> %{{.*}} - // CHECK: [[RES:%[^ ]+]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]) + // CHECK: [[RES:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]) // CHECK: fneg <4 x float> [[RES]] // CHECK-ASM: vfnmssb vd = vec_nmsub(vd, vd, vd); // CHECK: [[NEG:%[^ ]+]] = fneg <2 x double> %{{.*}} - // CHECK: [[RES:%[^ ]+]] = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]) + // CHECK: [[RES:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]) // CHECK: fneg <2 x double> [[RES]] // CHECK-ASM: vfnmsdb diff --git a/clang/test/CodeGen/cmse-clear-fp16.c b/clang/test/CodeGen/cmse-clear-fp16.c --- a/clang/test/CodeGen/cmse-clear-fp16.c +++ b/clang/test/CodeGen/cmse-clear-fp16.c @@ -21,7 +21,7 @@ // CHECK-NOPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535 // CHECK-NOPT-SOFT: ret i32 %[[V1]] -// CHECK-OPT-SOFT: %[[V0:.*]] = tail call {{.*}} @g0 +// CHECK-OPT-SOFT: %[[V0:.*]] = call {{.*}} @g0 // CHECK-OPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535 // CHECK-OPT-SOFT: ret i32 %[[V1]] diff --git a/clang/test/CodeGen/inline-asm-x86-flag-output.c b/clang/test/CodeGen/inline-asm-x86-flag-output.c --- a/clang/test/CodeGen/inline-asm-x86-flag-output.c +++ b/clang/test/CodeGen/inline-asm-x86-flag-output.c @@ -2,7 +2,7 @@ int test_cca(long nr, volatile long *addr) { //CHECK-LABEL: @test_cca - //CHECK: = tail call i32 asm "cmp $2,$1", "={@cca},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@cca},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@cca"(x), "=m"(*(volatile long *)(addr)) @@ -15,7 +15,7 @@ int test_ccae(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccae - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccae"(x), "=m"(*(volatile long *)(addr)) @@ -28,7 +28,7 @@ int test_ccb(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccb - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccb"(x), "=m"(*(volatile long *)(addr)) @@ -41,7 +41,7 @@ int test_ccbe(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccbe - //CHECK: tail call i32 asm "cmp $2,$1", "={@ccbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: call i32 asm "cmp $2,$1", "={@ccbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccbe"(x), "=m"(*(volatile long *)(addr)) @@ -54,7 +54,7 @@ int test_ccc(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccc - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccc"(x), "=m"(*(volatile long *)(addr)) @@ -67,7 +67,7 @@ int test_cce(long nr, volatile long *addr) { //CHECK-LABEL: @test_cce - //CHECK: = tail call i32 asm "cmp $2,$1", "={@cce},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@cce},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@cce"(x), "=m"(*(volatile long *)(addr)) @@ -80,7 +80,7 @@ int test_ccz(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccz - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccz"(x), "=m"(*(volatile long *)(addr)) @@ -93,7 +93,7 @@ int test_ccg(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccg - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccg},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccg},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccg"(x), "=m"(*(volatile long *)(addr)) @@ -106,7 +106,7 @@ int test_ccge(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccge - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccge"(x), "=m"(*(volatile long *)(addr)) @@ -119,7 +119,7 @@ int test_ccl(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccl - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccl"(x), "=m"(*(volatile long *)(addr)) @@ -132,7 +132,7 @@ int test_ccle(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccle - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccle"(x), "=m"(*(volatile long *)(addr)) @@ -145,7 +145,7 @@ int test_ccna(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccna - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccna},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccna},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccna"(x), "=m"(*(volatile long *)(addr)) @@ -158,7 +158,7 @@ int test_ccnae(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnae - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccnae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnae"(x), "=m"(*(volatile long *)(addr)) @@ -171,7 +171,7 @@ int test_ccnb(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnb - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccnb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnb"(x), "=m"(*(volatile long *)(addr)) @@ -184,7 +184,7 @@ int test_ccnbe(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnbe - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccnbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnbe"(x), "=m"(*(volatile long *)(addr)) @@ -197,7 +197,7 @@ int test_ccnc(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnc - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccnc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnc"(x), "=m"(*(volatile long *)(addr)) @@ -210,7 +210,7 @@ int test_ccne(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccne - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccne},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccne},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccne"(x), "=m"(*(volatile long *)(addr)) @@ -223,7 +223,7 @@ int test_ccnz(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnz - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccnz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnz"(x), "=m"(*(volatile long *)(addr)) @@ -236,7 +236,7 @@ int test_ccng(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccng - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccng},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccng},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccng"(x), "=m"(*(volatile long *)(addr)) @@ -249,7 +249,7 @@ int test_ccnge(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnge - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccnge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnge"(x), "=m"(*(volatile long *)(addr)) @@ -262,7 +262,7 @@ int test_ccnl(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnl - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccnl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnl"(x), "=m"(*(volatile long *)(addr)) @@ -275,7 +275,7 @@ int test_ccnle(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnle - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccnle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnle"(x), "=m"(*(volatile long *)(addr)) @@ -288,7 +288,7 @@ int test_ccno(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccno - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccno},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccno},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccno"(x), "=m"(*(volatile long *)(addr)) @@ -301,7 +301,7 @@ int test_ccnp(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccnp - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccnp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccnp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccnp"(x), "=m"(*(volatile long *)(addr)) @@ -314,7 +314,7 @@ int test_ccns(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccns - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccns},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccns},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccns"(x), "=m"(*(volatile long *)(addr)) @@ -327,7 +327,7 @@ int test_cco(long nr, volatile long *addr) { //CHECK-LABEL: @test_cco - //CHECK: = tail call i32 asm "cmp $2,$1", "={@cco},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@cco},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@cco"(x), "=m"(*(volatile long *)(addr)) @@ -340,7 +340,7 @@ int test_ccp(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccp - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccp"(x), "=m"(*(volatile long *)(addr)) @@ -353,7 +353,7 @@ int test_ccs(long nr, volatile long *addr) { //CHECK-LABEL: @test_ccs - //CHECK: = tail call i32 asm "cmp $2,$1", "={@ccs},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) + //CHECK: = call i32 asm "cmp $2,$1", "={@ccs},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) int x; asm("cmp %2,%1" : "=@ccs"(x), "=m"(*(volatile long *)(addr)) @@ -366,7 +366,7 @@ _Bool check_no_clobber_conflicts() { //CHECK-LABEL: @check_no_clobber_conflicts - //CHECK: = tail call i8 asm "", "={@cce},~{cx},~{dirflag},~{fpsr},~{flags}"() + //CHECK: = call i8 asm "", "={@cce},~{cx},~{dirflag},~{fpsr},~{flags}"() _Bool b; asm("" : "=@cce"(b) diff --git a/clang/test/CodeGen/ms-intrinsics-other.c b/clang/test/CodeGen/ms-intrinsics-other.c --- a/clang/test/CodeGen/ms-intrinsics-other.c +++ b/clang/test/CodeGen/ms-intrinsics-other.c @@ -31,7 +31,7 @@ // CHECK: [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ] // CHECK: ret i8 [[RESULT]] // CHECK: [[ISNOTZERO_LABEL]]: -// CHECK: [[INDEX:%[0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %Mask, i1 true) +// CHECK: [[INDEX:%[0-9]+]] = call i32 @llvm.cttz.i32(i32 %Mask, i1 true) // CHECK: store i32 [[INDEX]], i32* %Index, align 4 // CHECK: br label %[[END_LABEL]] @@ -45,7 +45,7 @@ // CHECK: [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ] // CHECK: ret i8 [[RESULT]] // CHECK: [[ISNOTZERO_LABEL]]: -// CHECK: [[REVINDEX:%[0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %Mask, i1 true) +// CHECK: [[REVINDEX:%[0-9]+]] = call i32 @llvm.ctlz.i32(i32 %Mask, i1 true) // CHECK: [[INDEX:%[0-9]+]] = xor i32 [[REVINDEX]], 31 // CHECK: store i32 [[INDEX]], i32* %Index, align 4 // CHECK: br label %[[END_LABEL]] @@ -61,7 +61,7 @@ // CHECK: [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ] // CHECK: ret i8 [[RESULT]] // CHECK: [[ISNOTZERO_LABEL]]: -// CHECK: [[INDEX:%[0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %Mask, i1 true) +// CHECK: [[INDEX:%[0-9]+]] = call i64 @llvm.cttz.i64(i64 %Mask, i1 true) // CHECK: [[TRUNC_INDEX:%[0-9]+]] = trunc i64 [[INDEX]] to i32 // CHECK: store i32 [[TRUNC_INDEX]], i32* %Index, align 4 // CHECK: br label %[[END_LABEL]] @@ -76,7 +76,7 @@ // CHECK: [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ] // CHECK: ret i8 [[RESULT]] // CHECK: [[ISNOTZERO_LABEL]]: -// CHECK: [[REVINDEX:%[0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %Mask, i1 true) +// CHECK: [[REVINDEX:%[0-9]+]] = call i64 @llvm.ctlz.i64(i64 %Mask, i1 true) // CHECK: [[TRUNC_REVINDEX:%[0-9]+]] = trunc i64 [[REVINDEX]] to i32 // CHECK: [[INDEX:%[0-9]+]] = xor i32 [[TRUNC_REVINDEX]], 63 // CHECK: store i32 [[INDEX]], i32* %Index, align 4 diff --git a/clang/test/CodeGen/ms-intrinsics.c b/clang/test/CodeGen/ms-intrinsics.c --- a/clang/test/CodeGen/ms-intrinsics.c +++ b/clang/test/CodeGen/ms-intrinsics.c @@ -156,7 +156,7 @@ // CHECK: [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ] // CHECK: ret i8 [[RESULT]] // CHECK: [[ISNOTZERO_LABEL]]: -// CHECK: [[INDEX:%[0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %Mask, i1 true) +// CHECK: [[INDEX:%[0-9]+]] = call i32 @llvm.cttz.i32(i32 %Mask, i1 true) // CHECK: store i32 [[INDEX]], i32* %Index, align 4 // CHECK: br label %[[END_LABEL]] @@ -170,7 +170,7 @@ // CHECK: [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ] // CHECK: ret i8 [[RESULT]] // CHECK: [[ISNOTZERO_LABEL]]: -// CHECK: [[REVINDEX:%[0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %Mask, i1 true) +// CHECK: [[REVINDEX:%[0-9]+]] = call i32 @llvm.ctlz.i32(i32 %Mask, i1 true) // CHECK: [[INDEX:%[0-9]+]] = xor i32 [[REVINDEX]], 31 // CHECK: store i32 [[INDEX]], i32* %Index, align 4 // CHECK: br label %[[END_LABEL]] @@ -186,7 +186,7 @@ // CHECK-ARM-X64: [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ] // CHECK-ARM-X64: ret i8 [[RESULT]] // CHECK-ARM-X64: [[ISNOTZERO_LABEL]]: -// CHECK-ARM-X64: [[INDEX:%[0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %Mask, i1 true) +// CHECK-ARM-X64: [[INDEX:%[0-9]+]] = call i64 @llvm.cttz.i64(i64 %Mask, i1 true) // CHECK-ARM-X64: [[TRUNC_INDEX:%[0-9]+]] = trunc i64 [[INDEX]] to i32 // CHECK-ARM-X64: store i32 [[TRUNC_INDEX]], i32* %Index, align 4 // CHECK-ARM-X64: br label %[[END_LABEL]] @@ -201,7 +201,7 @@ // CHECK-ARM-X64: [[RESULT:%[a-z0-9._]+]] = phi i8 [ 0, %[[ISZERO_LABEL:[a-z0-9._]+]] ], [ 1, %[[ISNOTZERO_LABEL]] ] // CHECK-ARM-X64: ret i8 [[RESULT]] // CHECK-ARM-X64: [[ISNOTZERO_LABEL]]: -// CHECK-ARM-X64: [[REVINDEX:%[0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %Mask, i1 true) +// CHECK-ARM-X64: [[REVINDEX:%[0-9]+]] = call i64 @llvm.ctlz.i64(i64 %Mask, i1 true) // CHECK-ARM-X64: [[TRUNC_REVINDEX:%[0-9]+]] = trunc i64 [[REVINDEX]] to i32 // CHECK-ARM-X64: [[INDEX:%[0-9]+]] = xor i32 [[TRUNC_REVINDEX]], 63 // CHECK-ARM-X64: store i32 [[INDEX]], i32* %Index, align 4 diff --git a/clang/test/CodeGen/systemz-inline-asm.c b/clang/test/CodeGen/systemz-inline-asm.c --- a/clang/test/CodeGen/systemz-inline-asm.c +++ b/clang/test/CodeGen/systemz-inline-asm.c @@ -126,6 +126,6 @@ // CHECK: define void @test_f128(fp128* noalias nocapture sret align 8 [[DEST:%.*]], fp128* nocapture readonly %0, fp128* nocapture readonly %1) // CHECK: %f = load fp128, fp128* %0 // CHECK: %g = load fp128, fp128* %1 -// CHECK: [[RESULT:%.*]] = tail call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g) +// CHECK: [[RESULT:%.*]] = call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g) // CHECK: store fp128 [[RESULT]], fp128* [[DEST]] } diff --git a/clang/test/CodeGenCXX/ARM/exception-alignment.cpp b/clang/test/CodeGenCXX/ARM/exception-alignment.cpp --- a/clang/test/CodeGenCXX/ARM/exception-alignment.cpp +++ b/clang/test/CodeGenCXX/ARM/exception-alignment.cpp @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -triple armv8-arm-none-eabi -emit-llvm -target-cpu generic -Os -fcxx-exceptions -o - -x c++ %s | FileCheck --check-prefixes=CHECK,A8 %s // RUN: %clang_cc1 -triple armv8-unknown-linux-android -emit-llvm -target-cpu generic -Os -fcxx-exceptions -o - -x c++ %s | FileCheck --check-prefixes=CHECK,A16 %s -// CHECK: [[E:%[A-z0-9]+]] = tail call i8* @__cxa_allocate_exception +// CHECK: [[E:%[A-z0-9]+]] = call i8* @__cxa_allocate_exception // CHECK-NEXT: [[BC:%[A-z0-9]+]] = bitcast i8* [[E]] to <2 x i64>* // A8-NEXT: store <2 x i64> , <2 x i64>* [[BC]], align 8 // A16-NEXT: store <2 x i64> , <2 x i64>* [[BC]], align 16 diff --git a/clang/test/CodeGenCXX/RelativeVTablesABI/dynamic-cast.cpp b/clang/test/CodeGenCXX/RelativeVTablesABI/dynamic-cast.cpp --- a/clang/test/CodeGenCXX/RelativeVTablesABI/dynamic-cast.cpp +++ b/clang/test/CodeGenCXX/RelativeVTablesABI/dynamic-cast.cpp @@ -15,7 +15,7 @@ // CHECK-NEXT: br i1 [[isnull]], label %[[dynamic_cast_end:[a-z0-9._]+]], label %[[dynamic_cast_notnull:[a-z0-9._]+]] // CHECK: [[dynamic_cast_notnull]]: // CHECK-NEXT: [[a:%[0-9]+]] = bitcast %class.A* %a to i8* -// CHECK-NEXT: [[as_b:%[0-9]+]] = tail call i8* @__dynamic_cast(i8* nonnull [[a]], i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1B to i8*), i64 0) +// CHECK-NEXT: [[as_b:%[0-9]+]] = call i8* @__dynamic_cast(i8* nonnull [[a]], i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1B to i8*), i64 0) // CHECK-NEXT: [[b:%[0-9]+]] = bitcast i8* [[as_b]] to %class.B* // CHECK-NEXT: br label %[[dynamic_cast_end]] // CHECK: [[dynamic_cast_end]]: diff --git a/clang/test/CodeGenCXX/RelativeVTablesABI/member-function-pointer.cpp b/clang/test/CodeGenCXX/RelativeVTablesABI/member-function-pointer.cpp --- a/clang/test/CodeGenCXX/RelativeVTablesABI/member-function-pointer.cpp +++ b/clang/test/CodeGenCXX/RelativeVTablesABI/member-function-pointer.cpp @@ -17,7 +17,7 @@ // CHECK-NEXT: [[this:%.+]] = bitcast i8* [[this_adj]] to i8** // CHECK-NEXT: [[vtable:%.+]] = load i8*, i8** [[this]], align 8 // CHECK-NEXT: [[offset:%.+]] = add i64 [[fn_ptr]], -1 -// CHECK-NEXT: [[ptr:%.+]] = tail call i8* @llvm.load.relative.i64(i8* [[vtable]], i64 [[offset]]) +// CHECK-NEXT: [[ptr:%.+]] = call i8* @llvm.load.relative.i64(i8* [[vtable]], i64 [[offset]]) // CHECK-NEXT: [[method:%.+]] = bitcast i8* [[ptr]] to void (%class.A*)* // CHECK-NEXT: br label %[[memptr_end:.+]] // CHECK: [[nonvirt]]: diff --git a/clang/test/CodeGenCXX/RelativeVTablesABI/type-info.cpp b/clang/test/CodeGenCXX/RelativeVTablesABI/type-info.cpp --- a/clang/test/CodeGenCXX/RelativeVTablesABI/type-info.cpp +++ b/clang/test/CodeGenCXX/RelativeVTablesABI/type-info.cpp @@ -40,7 +40,7 @@ // CHECK: [[end]]: // CHECK-NEXT: [[type_info_ptr3:%[0-9]+]] = bitcast %class.A* %a to i8** // CHECK-NEXT: [[vtable:%[a-z0-9]+]] = load i8*, i8** [[type_info_ptr3]] -// CHECK-NEXT: [[type_info_ptr:%[0-9]+]] = tail call i8* @llvm.load.relative.i32(i8* [[vtable]], i32 -4) +// CHECK-NEXT: [[type_info_ptr:%[0-9]+]] = call i8* @llvm.load.relative.i32(i8* [[vtable]], i32 -4) // CHECK-NEXT: [[type_info_ptr2:%[0-9]+]] = bitcast i8* [[type_info_ptr]] to %"class.std::type_info"** // CHECK-NEXT: [[type_info_ptr:%[0-9]+]] = load %"class.std::type_info"*, %"class.std::type_info"** [[type_info_ptr2]], align 8 // CHECK-NEXT: [[name_ptr:%[a-z0-9._]+]] = getelementptr inbounds %"class.std::type_info", %"class.std::type_info"* [[type_info_ptr]], i64 0, i32 1 diff --git a/clang/test/CodeGenCXX/RelativeVTablesABI/virtual-function-call.cpp b/clang/test/CodeGenCXX/RelativeVTablesABI/virtual-function-call.cpp --- a/clang/test/CodeGenCXX/RelativeVTablesABI/virtual-function-call.cpp +++ b/clang/test/CodeGenCXX/RelativeVTablesABI/virtual-function-call.cpp @@ -6,7 +6,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[this:%[0-9]+]] = bitcast %class.A* %a to i8** // CHECK-NEXT: %vtable1 = load i8*, i8** [[this]] -// CHECK-NEXT: [[func_ptr:%[0-9]+]] = tail call i8* @llvm.load.relative.i32(i8* %vtable1, i32 0) +// CHECK-NEXT: [[func_ptr:%[0-9]+]] = call i8* @llvm.load.relative.i32(i8* %vtable1, i32 0) // CHECK-NEXT: [[func:%[0-9]+]] = bitcast i8* [[func_ptr]] to void (%class.A*)* // CHECK-NEXT: tail call void [[func]](%class.A* %a) // CHECK-NEXT: ret void diff --git a/clang/test/CodeGenCXX/sizeof-unwind-exception.cpp b/clang/test/CodeGenCXX/sizeof-unwind-exception.cpp --- a/clang/test/CodeGenCXX/sizeof-unwind-exception.cpp +++ b/clang/test/CodeGenCXX/sizeof-unwind-exception.cpp @@ -15,15 +15,15 @@ // PR10789: different platforms have different sizes for struct UnwindException. -// X86-64: [[T0:%.*]] = tail call i8* @__cxa_begin_catch(i8* [[EXN:%.*]]) [[NUW:#[0-9]+]] +// X86-64: [[T0:%.*]] = call i8* @__cxa_begin_catch(i8* [[EXN:%.*]]) [[NUW:#[0-9]+]] // X86-64-NEXT: [[T1:%.*]] = getelementptr i8, i8* [[EXN]], i64 32 -// X86-32: [[T0:%.*]] = tail call i8* @__cxa_begin_catch(i8* [[EXN:%.*]]) [[NUW:#[0-9]+]] +// X86-32: [[T0:%.*]] = call i8* @__cxa_begin_catch(i8* [[EXN:%.*]]) [[NUW:#[0-9]+]] // X86-32-NEXT: [[T1:%.*]] = getelementptr i8, i8* [[EXN]], i64 32 -// ARM-DARWIN: [[T0:%.*]] = tail call i8* @__cxa_begin_catch(i8* [[EXN:%.*]]) [[NUW:#[0-9]+]] +// ARM-DARWIN: [[T0:%.*]] = call i8* @__cxa_begin_catch(i8* [[EXN:%.*]]) [[NUW:#[0-9]+]] // ARM-DARWIN-NEXT: [[T1:%.*]] = getelementptr i8, i8* [[EXN]], i64 32 -// ARM-EABI: [[T0:%.*]] = tail call i8* @__cxa_begin_catch(i8* [[EXN:%.*]]) [[NUW:#[0-9]+]] +// ARM-EABI: [[T0:%.*]] = call i8* @__cxa_begin_catch(i8* [[EXN:%.*]]) [[NUW:#[0-9]+]] // ARM-EABI-NEXT: [[T1:%.*]] = getelementptr i8, i8* [[EXN]], i32 88 -// MIPS: [[T0:%.*]] = tail call i8* @__cxa_begin_catch(i8* [[EXN:%.*]]) [[NUW:#[0-9]+]] +// MIPS: [[T0:%.*]] = call i8* @__cxa_begin_catch(i8* [[EXN:%.*]]) [[NUW:#[0-9]+]] // MIPS-NEXT: [[T1:%.*]] = getelementptr i8, i8* [[EXN]], i32 24 // X86-64: attributes [[NUW]] = { nounwind } diff --git a/clang/test/CodeGenObjCXX/arc-cxx11-init-list.mm b/clang/test/CodeGenObjCXX/arc-cxx11-init-list.mm --- a/clang/test/CodeGenObjCXX/arc-cxx11-init-list.mm +++ b/clang/test/CodeGenObjCXX/arc-cxx11-init-list.mm @@ -26,14 +26,14 @@ extern "C" void single() { function({ [I new] }); } -// CHECK: [[INSTANCE:%.*]] = {{.*}} call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) +// CHECK: [[INSTANCE:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) // CHECK-NEXT: [[CAST:%.*]] = bitcast [{{[0-9]+}} x %0*]* %{{.*}} to i8** // CHECK-NEXT: store i8* [[INSTANCE]], i8** [[CAST]], // CHECK: call void @llvm.objc.release(i8* {{.*}}) extern "C" void multiple() { function({ [I new], [I new] }); } -// CHECK: [[INSTANCE:%.*]] = {{.*}} call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) +// CHECK: [[INSTANCE:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) // CHECK-NEXT: [[CAST:%.*]] = bitcast [{{[0-9]+}} x %0*]* %{{.*}} to i8** // CHECK-NEXT: store i8* [[INSTANCE]], i8** [[CAST]], // CHECK: call void @llvm.objc.release(i8* {{.*}}) @@ -56,13 +56,13 @@ external(); } -// CHECK: [[INSTANCE:%.*]] = {{.*}} call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) +// CHECK: [[INSTANCE:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) // CHECK: {{.*}} call void @_Z8externalv() // CHECK: {{.*}} call void @llvm.objc.release(i8* {{.*}}) std::initializer_list il = { [I new] }; -// CHECK: [[POOL:%.*]] = {{.*}} call i8* @llvm.objc.autoreleasePoolPush() -// CHECK: [[INSTANCE:%.*]] = {{.*}} call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) +// CHECK: [[POOL:%.*]] = call i8* @llvm.objc.autoreleasePoolPush() +// CHECK: [[INSTANCE:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) // CHECK-NEXT: store i8* [[INSTANCE]], i8** bitcast ([1 x %0*]* @_ZGR2il_ to i8**) // CHECK: {{.*}} call void @llvm.objc.autoreleasePoolPop(i8* [[POOL]]) diff --git a/clang/test/CodeGenOpenCL/amdgcn-flat-scratch-name.cl b/clang/test/CodeGenOpenCL/amdgcn-flat-scratch-name.cl --- a/clang/test/CodeGenOpenCL/amdgcn-flat-scratch-name.cl +++ b/clang/test/CodeGenOpenCL/amdgcn-flat-scratch-name.cl @@ -4,10 +4,10 @@ // CHECK-LABEL: @use_flat_scratch_name kernel void use_flat_scratch_name() { -// CHECK: tail call void asm sideeffect "s_mov_b64 flat_scratch, 0", "~{flat_scratch}"() +// CHECK: call void asm sideeffect "s_mov_b64 flat_scratch, 0", "~{flat_scratch}"() __asm__ volatile("s_mov_b64 flat_scratch, 0" : : : "flat_scratch"); -// CHECK: tail call void asm sideeffect "s_mov_b32 flat_scratch_lo, 0", "~{flat_scratch_lo}"() +// CHECK: call void asm sideeffect "s_mov_b32 flat_scratch_lo, 0", "~{flat_scratch_lo}"() __asm__ volatile("s_mov_b32 flat_scratch_lo, 0" : : : "flat_scratch_lo"); // CHECK: tail call void asm sideeffect "s_mov_b32 flat_scratch_hi, 0", "~{flat_scratch_hi}"() diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -489,9 +489,9 @@ } // CHECK-LABEL: @test_get_group_id( -// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.x() -// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.y() -// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.z() +// CHECK: call i32 @llvm.amdgcn.workgroup.id.x() +// CHECK: call i32 @llvm.amdgcn.workgroup.id.y() +// CHECK: call i32 @llvm.amdgcn.workgroup.id.z() void test_get_group_id(int d, global int *out) { switch (d) { @@ -503,9 +503,9 @@ } // CHECK-LABEL: @test_s_getreg( -// CHECK: tail call i32 @llvm.amdgcn.s.getreg(i32 0) -// CHECK: tail call i32 @llvm.amdgcn.s.getreg(i32 1) -// CHECK: tail call i32 @llvm.amdgcn.s.getreg(i32 65535) +// CHECK: call i32 @llvm.amdgcn.s.getreg(i32 0) +// CHECK: call i32 @llvm.amdgcn.s.getreg(i32 1) +// CHECK: call i32 @llvm.amdgcn.s.getreg(i32 65535) void test_s_getreg(volatile global uint *out) { *out = __builtin_amdgcn_s_getreg(0); @@ -514,9 +514,9 @@ } // CHECK-LABEL: @test_get_local_id( -// CHECK: tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[$WI_RANGE:![0-9]*]] -// CHECK: tail call i32 @llvm.amdgcn.workitem.id.y(), !range [[$WI_RANGE]] -// CHECK: tail call i32 @llvm.amdgcn.workitem.id.z(), !range [[$WI_RANGE]] +// CHECK: call i32 @llvm.amdgcn.workitem.id.x(), !range [[$WI_RANGE:![0-9]*]] +// CHECK: call i32 @llvm.amdgcn.workitem.id.y(), !range [[$WI_RANGE]] +// CHECK: call i32 @llvm.amdgcn.workitem.id.z(), !range [[$WI_RANGE]] void test_get_local_id(int d, global int *out) { switch (d) { @@ -614,67 +614,67 @@ } // CHECK-LABEL: @test_alignbit( -// CHECK: tail call i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2) +// CHECK: call i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2) kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_alignbit(src0, src1, src2); } // CHECK-LABEL: @test_alignbyte( -// CHECK: tail call i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2) +// CHECK: call i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2) kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_alignbyte(src0, src1, src2); } // CHECK-LABEL: @test_ubfe( -// CHECK: tail call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) +// CHECK: call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) kernel void test_ubfe(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_ubfe(src0, src1, src2); } // CHECK-LABEL: @test_sbfe( -// CHECK: tail call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2) +// CHECK: call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2) kernel void test_sbfe(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_sbfe(src0, src1, src2); } // CHECK-LABEL: @test_cvt_pkrtz( -// CHECK: tail call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %src0, float %src1) +// CHECK: call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %src0, float %src1) kernel void test_cvt_pkrtz(global half2* out, float src0, float src1) { *out = __builtin_amdgcn_cvt_pkrtz(src0, src1); } // CHECK-LABEL: @test_cvt_pknorm_i16( -// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1) +// CHECK: call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1) kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) { *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1); } // CHECK-LABEL: @test_cvt_pknorm_u16( -// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1) +// CHECK: call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1) kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) { *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1); } // CHECK-LABEL: @test_cvt_pk_i16( -// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1) +// CHECK: call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1) kernel void test_cvt_pk_i16(global short2* out, int src0, int src1) { *out = __builtin_amdgcn_cvt_pk_i16(src0, src1); } // CHECK-LABEL: @test_cvt_pk_u16( -// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %src0, i32 %src1) +// CHECK: call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %src0, i32 %src1) kernel void test_cvt_pk_u16(global ushort2* out, uint src0, uint src1) { *out = __builtin_amdgcn_cvt_pk_u16(src0, src1); } // CHECK-LABEL: @test_cvt_pk_u8_f32 -// CHECK: tail call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src0, i32 %src1, i32 %src2) +// CHECK: call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src0, i32 %src1, i32 %src2) kernel void test_cvt_pk_u8_f32(global uint* out, float src0, uint src1, uint src2) { *out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2); } // CHECK-LABEL: @test_sad_u8( -// CHECK: tail call i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2) +// CHECK: call i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2) kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_sad_u8(src0, src1, src2); } diff --git a/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl --- a/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-generic-amdgcn.cl @@ -2,20 +2,20 @@ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: @test_builtin_clz( -// CHECK: tail call i32 @llvm.ctlz.i32(i32 %a, i1 true) +// CHECK: call i32 @llvm.ctlz.i32(i32 %a, i1 true) void test_builtin_clz(global int* out, int a) { *out = __builtin_clz(a); } // CHECK-LABEL: @test_builtin_clzl( -// CHECK: tail call i64 @llvm.ctlz.i64(i64 %a, i1 true) +// CHECK: call i64 @llvm.ctlz.i64(i64 %a, i1 true) void test_builtin_clzl(global long* out, long a) { *out = __builtin_clzl(a); } -// CHECK: tail call i8 addrspace(5)* @llvm.frameaddress.p5i8(i32 0) +// CHECK: call i8 addrspace(5)* @llvm.frameaddress.p5i8(i32 0) void test_builtin_frame_address(int *out) { *out = __builtin_frame_address(0); } diff --git a/clang/test/CodeGenOpenCL/builtins-r600.cl b/clang/test/CodeGenOpenCL/builtins-r600.cl --- a/clang/test/CodeGenOpenCL/builtins-r600.cl +++ b/clang/test/CodeGenOpenCL/builtins-r600.cl @@ -25,9 +25,9 @@ } // CHECK-LABEL: @test_get_group_id( -// CHECK: tail call i32 @llvm.r600.read.tgid.x() -// CHECK: tail call i32 @llvm.r600.read.tgid.y() -// CHECK: tail call i32 @llvm.r600.read.tgid.z() +// CHECK: call i32 @llvm.r600.read.tgid.x() +// CHECK: call i32 @llvm.r600.read.tgid.y() +// CHECK: call i32 @llvm.r600.read.tgid.z() void test_get_group_id(int d, global int *out) { switch (d) { @@ -39,9 +39,9 @@ } // CHECK-LABEL: @test_get_local_id( -// CHECK: tail call i32 @llvm.r600.read.tidig.x(), !range [[WI_RANGE:![0-9]*]] -// CHECK: tail call i32 @llvm.r600.read.tidig.y(), !range [[WI_RANGE]] -// CHECK: tail call i32 @llvm.r600.read.tidig.z(), !range [[WI_RANGE]] +// CHECK: call i32 @llvm.r600.read.tidig.x(), !range [[WI_RANGE:![0-9]*]] +// CHECK: call i32 @llvm.r600.read.tidig.y(), !range [[WI_RANGE]] +// CHECK: call i32 @llvm.r600.read.tidig.z(), !range [[WI_RANGE]] void test_get_local_id(int d, global int *out) { switch (d) { diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl --- a/clang/test/CodeGenOpenCL/convergent.cl +++ b/clang/test/CodeGenOpenCL/convergent.cl @@ -33,14 +33,14 @@ // CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]] // CHECK: [[if_then]]: -// CHECK: tail call spir_func void @f() -// CHECK: tail call spir_func void @non_convfun() +// CHECK: call spir_func void @f() +// CHECK: call spir_func void @non_convfun() // CHECK: tail call spir_func void @g() // CHECK: br label %[[if_end3:.+]] // CHECK: [[if_end3_critedge]]: -// CHECK: tail call spir_func void @non_convfun() +// CHECK: call spir_func void @non_convfun() // CHECK: br label %[[if_end3]] // CHECK: [[if_end3]]: @@ -65,13 +65,13 @@ // CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0 // CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]] // CHECK: [[if_then]]: -// CHECK: tail call spir_func void @f() +// CHECK: call spir_func void @f() // CHECK-NOT: call spir_func void @convfun() // CHECK-NOT: call spir_func void @g() // CHECK: br label %[[if_end]] // CHECK: [[if_end]]: // CHECK: %[[tobool_pr:.+]] = phi i1 [ true, %[[if_then]] ], [ false, %{{.+}} ] -// CHECK: tail call spir_func void @convfun() #[[attr4:.+]] +// CHECK: call spir_func void @convfun() #[[attr4:.+]] // CHECK: br i1 %[[tobool_pr]], label %[[if_then2:.+]], label %[[if_end3:.+]] // CHECK: [[if_then2]]: // CHECK: tail call spir_func void @g() @@ -93,16 +93,16 @@ // Test loop is unrolled for convergent function. // CHECK-LABEL: define spir_func void @test_unroll() local_unnamed_addr #1 -// CHECK: tail call spir_func void @convfun() #[[attr4:[0-9]+]] -// CHECK: tail call spir_func void @convfun() #[[attr4]] -// CHECK: tail call spir_func void @convfun() #[[attr4]] -// CHECK: tail call spir_func void @convfun() #[[attr4]] -// CHECK: tail call spir_func void @convfun() #[[attr4]] -// CHECK: tail call spir_func void @convfun() #[[attr4]] -// CHECK: tail call spir_func void @convfun() #[[attr4]] -// CHECK: tail call spir_func void @convfun() #[[attr4]] -// CHECK: tail call spir_func void @convfun() #[[attr4]] -// CHECK: tail call spir_func void @convfun() #[[attr4]] +// CHECK: call spir_func void @convfun() #[[attr4:[0-9]+]] +// CHECK: call spir_func void @convfun() #[[attr4]] +// CHECK: call spir_func void @convfun() #[[attr4]] +// CHECK: call spir_func void @convfun() #[[attr4]] +// CHECK: call spir_func void @convfun() #[[attr4]] +// CHECK: call spir_func void @convfun() #[[attr4]] +// CHECK: call spir_func void @convfun() #[[attr4]] +// CHECK: call spir_func void @convfun() #[[attr4]] +// CHECK: call spir_func void @convfun() #[[attr4]] +// CHECK: call spir_func void @convfun() #[[attr4]] // CHECK-LABEL: ret void void test_unroll() { @@ -116,7 +116,7 @@ // CHECK: [[for_cond_cleanup:.+]]: // CHECK: ret void // CHECK: [[for_body]]: -// CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]] +// CHECK: call spir_func void @nodupfun() #[[attr5:[0-9]+]] // CHECK-NOT: call spir_func void @nodupfun() // The new PM produces a slightly different IR for the loop from the legacy PM, diff --git a/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/clang/test/CodeGenOpenCL/spir-calling-conv.cl --- a/clang/test/CodeGenOpenCL/spir-calling-conv.cl +++ b/clang/test/CodeGenOpenCL/spir-calling-conv.cl @@ -8,7 +8,7 @@ // CHECK: define spir_kernel void @foo(i32 addrspace(1)* %A) { int id = get_dummy_id(0); - // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 0) + // CHECK: %{{[a-z0-9_]+}} = call spir_func i32 @get_dummy_id(i32 0) A[id] = id; bar(A); // CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* %A) diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -89,16 +89,6 @@ STATISTIC(NumRetDuped, "Number of return duplicated"); STATISTIC(NumAccumAdded, "Number of accumulators introduced"); -/// Scan the specified function for alloca instructions. -/// If it contains any dynamic allocas, returns false. -static bool canTRE(Function &F) { - // Because of PR962, we don't TRE dynamic allocas. - return llvm::all_of(instructions(F), [](Instruction &I) { - auto *AI = dyn_cast(&I); - return !AI || AI->isStaticAlloca(); - }); -} - namespace { struct AllocaDerivedValueTracker { // Start at a root value and walk its use-def chain to mark calls that use the @@ -185,11 +175,144 @@ }; } -static bool markTails(Function &F, bool &AllCallsAreTailCalls, - OptimizationRemarkEmitter *ORE) { +// Debug info intrinsics, lifetime end or assume intrinsic +// should not stop tail call optimization. +static bool canBeIgnoredForTailCall(Instruction *I) { + if (isa(I)) + return true; + + if (const IntrinsicInst *II = dyn_cast(I)) + if (II->getIntrinsicID() == Intrinsic::lifetime_end || + II->getIntrinsicID() == Intrinsic::assume) + return true; + + return false; +} + +/// Returns true if it is safe to move the specified +/// instruction from after the call to before the call, assuming that all +/// instructions between the call and this instruction are movable. +static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { + if (canBeIgnoredForTailCall(I)) + return true; + + // FIXME: We can move load/store/call/free instructions above the call if the + // call does not mod/ref the memory location being processed. + if (I->mayHaveSideEffects()) // This also handles volatile loads. + return false; + + if (LoadInst *L = dyn_cast(I)) { + // Loads may always be moved above calls without side effects. + if (CI->mayHaveSideEffects()) { + // Non-volatile loads may be moved above a call with side effects if it + // does not write to memory and the load provably won't trap. + // Writes to memory only matter if they may alias the pointer + // being loaded from. + const DataLayout &DL = L->getModule()->getDataLayout(); + if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) || + !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(), + L->getAlign(), DL, L)) + return false; + } + } + + // Otherwise, if this is a side-effect free instruction, check to make sure + // that it does not use the return value of the call. If it doesn't use the + // return value of the call, it must only use things that are defined before + // the call, or movable instructions between the call and the instruction + // itself. + return !is_contained(I->operands(), CI); +} + +/// Returns true if it is noopt bitcast. +/// (i.e. it casts pointer to pointer). +static bool isNoopBitcast(Instruction *I) { + if (BitCastInst *BitCast = dyn_cast(I)) { + Type *T1 = BitCast->getType(); + Type *T2 = BitCast->getOperand(0)->getType(); + + return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()); + } + + return false; +} + +/// Returns true if this is a block with "return", "unreachable" +/// or "unconditional branch" to other block with "return" +/// instruction. RetValue keeps return value if applicable. +static bool isTailBlock(BasicBlock *BB, Value*& RetValue) { + Instruction *LastBlockInstr = BB->getTerminator(); + RetValue = nullptr; + + // Check that last instruction is a "return", either "unreachable", + // either branch to other block containing "return". + if (ReturnInst* Ret = dyn_cast(LastBlockInstr)) { + RetValue = Ret->getReturnValue(); + return true; + } + + if (isa(LastBlockInstr)) + return true; + + if (BranchInst *Branch = dyn_cast(LastBlockInstr)) { + if (Branch->isUnconditional()) + if(ReturnInst* Ret = dyn_cast(Branch->getSuccessor(0)->getFirstNonPHIOrDbg())) { + RetValue = Ret->getReturnValue(); + return true; + } + } + + return false; +} + +/// Checks that specified call instruction is in chain of recursive +/// calls before return. +static bool areAllLastFuncCallRecursive(CallInst *Inst, Function &F) { + BasicBlock::iterator BBI(Inst->getParent()->getTerminator()); + for (--BBI; &*BBI != Inst; --BBI) { + if (CallInst *CI = dyn_cast(&*BBI)) + if (!canBeIgnoredForTailCall(CI) && CI->getCalledFunction() != &F) + return false; + } + + return true; +} + +/// Checks that call position is suitable for tailcall optimization. +static bool isInTailCallPosition(CallInst *TailCallCandidate, + AliasAnalysis *AA) { + Value* RetValue = nullptr; + if (!isTailBlock(TailCallCandidate->getParent(), RetValue)) + return false; + + // Check that instructions chain between call and return + // has only noops or instructions which could be moved. + BasicBlock::iterator BBI(TailCallCandidate->getParent()->getTerminator()); + for (--BBI; &*BBI != TailCallCandidate; --BBI) { + if (canMoveAboveCall(&*BBI, TailCallCandidate, AA)) + continue; + else if (isNoopBitcast(&*BBI)) + continue; + else + return false; + } + + // Check that return value matches with TailCallCandidate or void. + if (RetValue != nullptr && RetValue != TailCallCandidate) { + if (BitCastInst *BitCast = dyn_cast(RetValue)) + return (isNoopBitcast(BitCast) && BitCast->getOperand(0) == TailCallCandidate); + + // Return operand does not match with tailcall candidate. + return false; + } + + return true; +} + +static bool markTails(Function &F, OptimizationRemarkEmitter *ORE, + AliasAnalysis *AA) { if (F.callsFunctionThatReturnsTwice()) return false; - AllCallsAreTailCalls = true; // The local stack holds all alloca instructions and all byval arguments. AllocaDerivedValueTracker Tracker; @@ -237,7 +360,8 @@ Escaped = ESCAPED; CallInst *CI = dyn_cast(&I); - if (!CI || CI->isTailCall() || isa(&I)) + + if (!CI || CI->isTailCall() || !isInTailCallPosition(CI, AA)) continue; bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles(); @@ -272,11 +396,8 @@ } } - if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { + if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) DeferredTails.push_back(CI); - } else { - AllCallsAreTailCalls = false; - } } for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { @@ -313,47 +434,12 @@ LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n"); CI->setTailCall(); Modified = true; - } else { - AllCallsAreTailCalls = false; } } return Modified; } -/// Return true if it is safe to move the specified -/// instruction from after the call to before the call, assuming that all -/// instructions between the call and this instruction are movable. -/// -static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { - // FIXME: We can move load/store/call/free instructions above the call if the - // call does not mod/ref the memory location being processed. - if (I->mayHaveSideEffects()) // This also handles volatile loads. - return false; - - if (LoadInst *L = dyn_cast(I)) { - // Loads may always be moved above calls without side effects. - if (CI->mayHaveSideEffects()) { - // Non-volatile loads may be moved above a call with side effects if it - // does not write to memory and the load provably won't trap. - // Writes to memory only matter if they may alias the pointer - // being loaded from. - const DataLayout &DL = L->getModule()->getDataLayout(); - if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) || - !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(), - L->getAlign(), DL, L)) - return false; - } - } - - // Otherwise, if this is a side-effect free instruction, check to make sure - // that it does not use the return value of the call. If it doesn't use the - // return value of the call, it must only use things that are defined before - // the call, or movable instructions between the call and the instruction - // itself. - return !is_contained(I->operands(), CI); -} - static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { if (!I->isAssociative() || !I->isCommutative()) return false; @@ -392,7 +478,6 @@ // createTailRecurseLoopHeader the first time we find a call we can eliminate. BasicBlock *HeaderBB = nullptr; SmallVector ArgumentPHIs; - bool RemovableCallsMustBeMarkedTail = false; // PHI node to store our return value. PHINode *RetPN = nullptr; @@ -419,8 +504,7 @@ DomTreeUpdater &DTU) : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {} - CallInst *findTRECandidate(Instruction *TI, - bool CannotTailCallElimCallsMarkedTail); + CallInst *findTRECandidate(Instruction *TI); void createTailRecurseLoopHeader(CallInst *CI); @@ -428,14 +512,14 @@ bool eliminateCall(CallInst *CI); - bool foldReturnAndProcessPred(ReturnInst *Ret, - bool CannotTailCallElimCallsMarkedTail); + bool foldReturnAndProcessPred(ReturnInst *Ret); - bool processReturningBlock(ReturnInst *Ret, - bool CannotTailCallElimCallsMarkedTail); + bool processReturningBlock(ReturnInst *Ret); void cleanupAndFinalize(); + bool canTRE(Function &F); + public: static bool eliminate(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, @@ -443,8 +527,7 @@ }; } // namespace -CallInst *TailRecursionEliminator::findTRECandidate( - Instruction *TI, bool CannotTailCallElimCallsMarkedTail) { +CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) { BasicBlock *BB = TI->getParent(); if (&BB->front() == TI) // Make sure there is something before the terminator. @@ -464,11 +547,6 @@ --BBI; } - // If this call is marked as a tail call, and if there are dynamic allocas in - // the function, we cannot perform this optimization. - if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) - return nullptr; - // As a special case, detect code like this: // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call // and disable this xform in this case, because the code generator will @@ -498,26 +576,15 @@ BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry); BI->setDebugLoc(CI->getDebugLoc()); - // If this function has self recursive calls in the tail position where some - // are marked tail and some are not, only transform one flavor or another. - // We have to choose whether we move allocas in the entry block to the new - // entry block or not, so we can't make a good choice for both. We make this - // decision here based on whether the first call we found to remove is - // marked tail. - // NOTE: We could do slightly better here in the case that the function has - // no entry block allocas. - RemovableCallsMustBeMarkedTail = CI->isTailCall(); - - // If this tail call is marked 'tail' and if there are any allocas in the + // if there are any allocas in the // entry block, move them up to the new entry block. - if (RemovableCallsMustBeMarkedTail) - // Move all fixed sized allocas from HeaderBB to NewEntry. - for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(), - NEBI = NewEntry->begin(); - OEBI != E;) - if (AllocaInst *AI = dyn_cast(OEBI++)) - if (isa(AI->getArraySize())) - AI->moveBefore(&*NEBI); + // Move all fixed sized allocas from HeaderBB to NewEntry. + for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(), + NEBI = NewEntry->begin(); + OEBI != E;) + if (AllocaInst *AI = dyn_cast(OEBI++)) + if (isa(AI->getArraySize())) + AI->moveBefore(&*NEBI); // Now that we have created a new block, which jumps to the entry // block, insert a PHI node for each argument of the function. @@ -620,9 +687,6 @@ if (!HeaderBB) createTailRecurseLoopHeader(CI); - if (RemovableCallsMustBeMarkedTail && !CI->isTailCall()) - return false; - // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. @@ -672,8 +736,7 @@ return true; } -bool TailRecursionEliminator::foldReturnAndProcessPred( - ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) { +bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) { BasicBlock *BB = Ret->getParent(); bool Change = false; @@ -698,8 +761,7 @@ while (!UncondBranchPreds.empty()) { BranchInst *BI = UncondBranchPreds.pop_back_val(); BasicBlock *Pred = BI->getParent(); - if (CallInst *CI = - findTRECandidate(BI, CannotTailCallElimCallsMarkedTail)) { + if (CallInst *CI = findTRECandidate(BI)) { LLVM_DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU); @@ -720,9 +782,8 @@ return Change; } -bool TailRecursionEliminator::processReturningBlock( - ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) { - CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail); +bool TailRecursionEliminator::processReturningBlock(ReturnInst *Ret) { + CallInst *CI = findTRECandidate(Ret); if (!CI) return false; @@ -810,23 +871,18 @@ return false; bool MadeChange = false; - bool AllCallsAreTailCalls = false; - MadeChange |= markTails(F, AllCallsAreTailCalls, ORE); - if (!AllCallsAreTailCalls) - return MadeChange; + MadeChange |= markTails(F, ORE, AA); // If this function is a varargs function, we won't be able to PHI the args // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return MadeChange; - // If false, we cannot perform TRE on tail calls marked with the 'tail' - // attribute, because doing so would cause the stack size to increase (real - // TRE would deallocate variable sized allocas, TRE doesn't). - bool CanTRETailMarkedCall = canTRE(F); - TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU); + if (!TRE.canTRE(F)) + return MadeChange; + // Change any tail recursive calls to loops. // // FIXME: The code generator produces really bad code when an 'escaping @@ -836,9 +892,9 @@ for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) { BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB. if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { - bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall); + bool Change = TRE.processReturningBlock(Ret); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall); + Change = TRE.foldReturnAndProcessPred(Ret); MadeChange |= Change; } } @@ -848,6 +904,51 @@ return MadeChange; } +bool TailRecursionEliminator::canTRE(Function &F) { + // The local stack holds all alloca instructions and all byval arguments. + AllocaDerivedValueTracker Tracker; + for (Argument &Arg : F.args()) { + if (Arg.hasByValAttr()) + Tracker.walk(&Arg); + } + for (auto &BB : F) { + for (auto &I : BB) + if (AllocaInst *AI = dyn_cast(&I)) + Tracker.walk(AI); + } + + // do not do TRE if any pointer to local stack has escaped. + if (!Tracker.EscapePoints.empty()) + return false; + + return !llvm::any_of(instructions(F), [&](Instruction &I) { + // Because of PR962, we don't TRE dynamic allocas. + if (AllocaInst *AI = dyn_cast(&I)) { + if (AI && !AI->isStaticAlloca()) + return true; + } else if (CallInst *CI = dyn_cast(&I)) { + if (CI->getCalledFunction() == &F) { + // Do not do TRE if explicitly marked as NoTailcall or has Operand + // Bundles. + if (CI->isNoTailCall() || CI->hasOperandBundles()) + return true; + + // Do not do TRE if exists recursive calls which are not last calls. + Value* RetValue = nullptr; + if (!isTailBlock(CI->getParent(), RetValue) || + !areAllLastFuncCallRecursive(CI, F)) + return true; + + // Do not do TRE if recursive call receives pointer to the local stack. + if (Tracker.AllocaUsers.count(CI) > 0) + return true; + } + } + + return false; + }); +} + namespace { struct TailCallElim : public FunctionPass { static char ID; // Pass identification, replacement for typeid diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll @@ -55,22 +55,22 @@ } ; GCN: define amdgpu_kernel void @test_inliner( -; GCN-INL1: %c1 = tail call coldcc float @foo( +; GCN-INL1: %c1 = call coldcc float @foo( ; GCN-INLDEF: %cmp.i = fcmp ogt float %tmp2, 0.000000e+00 ; GCN: %div.i{{[0-9]*}} = fdiv float 1.000000e+00, %c ; GCN: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i ; GCN: call void @foo_noinline( -; GCN: tail call float @_Z3sinf( +; GCN: call float @_Z3sinf( define amdgpu_kernel void @test_inliner(float addrspace(1)* nocapture %a, i32 %n) { entry: %pvt_arr = alloca [64 x float], align 4, addrspace(5) - %tid = tail call i32 @llvm.amdgcn.workitem.id.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid %tmp2 = load float, float addrspace(1)* %arrayidx, align 4 %add = add i32 %tid, 1 %arrayidx2 = getelementptr inbounds float, float addrspace(1)* %a, i32 %add %tmp5 = load float, float addrspace(1)* %arrayidx2, align 4 - %c1 = tail call coldcc float @foo(float %tmp2, float %tmp5) + %c1 = call coldcc float @foo(float %tmp2, float %tmp5) %or = or i32 %tid, %n %arrayidx5 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %or store float %c1, float addrspace(5)* %arrayidx5, align 4 @@ -98,7 +98,7 @@ entry: %pvt_arr1 = alloca [32 x float], align 4, addrspace(5) %pvt_arr2 = alloca [32 x float], align 4, addrspace(5) - %tid = tail call i32 @llvm.amdgcn.workitem.id.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid %or = or i32 %tid, %n %arrayidx4 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %or @@ -125,7 +125,7 @@ entry: %pvt_arr1 = alloca [32 x float], align 4, addrspace(5) %pvt_arr2 = alloca [33 x float], align 4, addrspace(5) - %tid = tail call i32 @llvm.amdgcn.workitem.id.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid %or = or i32 %tid, %n %arrayidx4 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %or diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-transforms.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-transforms.ll --- a/llvm/test/CodeGen/BPF/CORE/intrinsic-transforms.ll +++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-transforms.ll @@ -55,8 +55,8 @@ %call3 = call i32 @get_value(i32* %9), !dbg !26 br label %cond.end5, !dbg !24 -; CHECK: tail call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %{{[0-9a-z]+}}, i32 0, i32 4), !dbg !{{[0-9]+}}, !llvm.preserve.access.index !{{[0-9]+}} -; CHECK-NOT: tail call i32* @llvm.preserve.array.access.index +; CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %{{[0-9a-z]+}}, i32 0, i32 4), !dbg !{{[0-9]+}}, !llvm.preserve.access.index !{{[0-9]+}} +; CHECK-NOT: call i32* @llvm.preserve.array.access.index cond.false4: ; preds = %cond.end br label %cond.end5, !dbg !24 diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll b/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll --- a/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll @@ -31,7 +31,7 @@ ; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* %buffer, i64 8 ; CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to i32* ; CHECK-NEXT: store i32 %n, i32* [[T1]], align 4 -; CHECK-NEXT: [[ALLOC:%.*]] = tail call i8* @allocate(i32 %n) +; CHECK-NEXT: [[ALLOC:%.*]] = call i8* @allocate(i32 %n) ; CHECK-NEXT: [[T1:%.*]] = bitcast i8* %buffer to i8** ; CHECK-NEXT: store i8* [[ALLOC]], i8** [[T1]], align 8 ; CHECK-NEXT: [[T0:%.*]] = insertvalue { i8*, i8*, i32 } { i8* bitcast ({ i8*, i8*, i32 } (i8*, i1)* @f.resume.0 to i8*), i8* undef, i32 undef }, i8* [[ALLOC]], 1 @@ -43,7 +43,7 @@ ; CHECK-NEXT: : ; CHECK-NEXT: [[T1:%.*]] = bitcast i8* %0 to i8** ; CHECK-NEXT: [[ALLOC:%.*]] = load i8*, i8** [[T1]], align 8 -; CHECK-NEXT: tail call void @deallocate(i8* [[ALLOC]]) +; CHECK-NEXT: call void @deallocate(i8* [[ALLOC]]) ; CHECK-NEXT: br i1 %1, declare {i8*, i32} @prototype_g(i8*, i1) diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll @@ -331,16 +331,16 @@ ; CHECK-LABEL: @fmin_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P:%.*]], align 4, !tbaa !7 -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP0]], float 0x47EFFFFFE0000000) +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.minnum.f32(float [[TMP0]], float 0x47EFFFFFE0000000) ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[P]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX_1]], align 4, !tbaa !7 -; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP2]], float [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.minnum.f32(float [[TMP2]], float [[TMP1]]) ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 ; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX_2]], align 4, !tbaa !7 -; CHECK-NEXT: [[TMP5:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP4]], float [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.minnum.f32(float [[TMP4]], float [[TMP3]]) ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_3]], align 4, !tbaa !7 -; CHECK-NEXT: [[TMP7:%.*]] = tail call fast float @llvm.minnum.f32(float [[TMP6]], float [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.minnum.f32(float [[TMP6]], float [[TMP5]]) ; CHECK-NEXT: ret float [[TMP7]] ; entry: diff --git a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll --- a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll +++ b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll @@ -44,7 +44,7 @@ ; ASSUMPTIONS-ON-NEXT: [[PTRINT:%.*]] = ptrtoint i64* [[PTR:%.*]] to i64 ; ASSUMPTIONS-ON-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7 ; ASSUMPTIONS-ON-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 -; ASSUMPTIONS-ON-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) ; ASSUMPTIONS-ON-NEXT: store volatile i64 0, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 diff --git a/llvm/test/Transforms/TailCallElim/accum_recursion.ll b/llvm/test/Transforms/TailCallElim/accum_recursion.ll --- a/llvm/test/Transforms/TailCallElim/accum_recursion.ll +++ b/llvm/test/Transforms/TailCallElim/accum_recursion.ll @@ -59,9 +59,9 @@ bb1: %0 = add i64 %n, -1 - %recurse1 = tail call i64 @test3_fib(i64 %0) nounwind + %recurse1 = call i64 @test3_fib(i64 %0) nounwind %1 = add i64 %n, -2 - %recurse2 = tail call i64 @test3_fib(i64 %1) nounwind + %recurse2 = call i64 @test3_fib(i64 %1) nounwind %accumulate = add nsw i64 %recurse2, %recurse1 ret i64 %accumulate diff --git a/llvm/test/Transforms/TailCallElim/basic.ll b/llvm/test/Transforms/TailCallElim/basic.ll --- a/llvm/test/Transforms/TailCallElim/basic.ll +++ b/llvm/test/Transforms/TailCallElim/basic.ll @@ -211,13 +211,17 @@ ; If an alloca is passed byval it is not a use of the alloca or an escape ; point, and both calls below can be marked tail. -define void @test13() { +define void @test13(i1 %cond) { ; CHECK-LABEL: @test13 ; CHECK: tail call void @bar(%struct.foo* byval %f) ; CHECK: tail call void @bar(%struct.foo* null) entry: %f = alloca %struct.foo + br i1 %cond, label %cond_true, label %cond_false +cond_true: call void @bar(%struct.foo* byval %f) + ret void +cond_false: call void @bar(%struct.foo* null) ret void } @@ -245,5 +249,19 @@ ret void } +; If one call followed by another call then the first one +; should not be marked as a tailcall. +define void @test16(i32 %X) { +; CHECK-NOT: tail call void @noarg() +; CHECK: add i32 %X +; CHECK-NEXT: tail call void @noarg() +; CHECK-NEXT: ret + call void @noarg() + %DUMMY = add i32 %X, 1 + call void @noarg() + ret void +} + + declare void @bar(%struct.foo* byval) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)