Index: cfe/trunk/lib/CodeGen/CGBuiltin.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp @@ -2895,16 +2895,19 @@ return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); } case NEON::BI__builtin_neon_vld1_v: - case NEON::BI__builtin_neon_vld1q_v: + case NEON::BI__builtin_neon_vld1q_v: { + llvm::Type *Tys[] = {Ty, Int8PtrTy}; Ops.push_back(getAlignmentValue32(PtrOp0)); - return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1"); + return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); + } case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: case NEON::BI__builtin_neon_vld3_v: case NEON::BI__builtin_neon_vld3q_v: case NEON::BI__builtin_neon_vld4_v: case NEON::BI__builtin_neon_vld4q_v: { - Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); Value *Align = getAlignmentValue32(PtrOp1); Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); @@ -2927,7 +2930,8 @@ case NEON::BI__builtin_neon_vld3q_lane_v: case NEON::BI__builtin_neon_vld4_lane_v: case NEON::BI__builtin_neon_vld4q_lane_v: { - Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); for (unsigned I = 2; I < Ops.size() - 1; ++I) Ops[I] = Builder.CreateBitCast(Ops[I], Ty); Ops.push_back(getAlignmentValue32(PtrOp1)); @@ -3046,9 +3050,11 @@ case NEON::BI__builtin_neon_vst3_lane_v: case NEON::BI__builtin_neon_vst3q_lane_v: case NEON::BI__builtin_neon_vst4_lane_v: - case NEON::BI__builtin_neon_vst4q_lane_v: + case NEON::BI__builtin_neon_vst4q_lane_v: { + llvm::Type *Tys[] = {Int8PtrTy, Ty}; Ops.push_back(getAlignmentValue32(PtrOp0)); - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); + } case NEON::BI__builtin_neon_vsubhn_v: { llvm::VectorType *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); @@ -3776,7 +3782,8 @@ Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); // Load the value as a one-element vector. Ty = llvm::VectorType::get(VTy->getElementType(), 1); - Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); Value *Align = getAlignmentValue32(PtrOp0); Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); // Combine them. @@ -3808,7 +3815,8 @@ break; default: llvm_unreachable("unknown vld_dup intrinsic?"); } - Function *F = CGM.getIntrinsic(Int, Ty); + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Function *F = CGM.getIntrinsic(Int, Tys); llvm::Value *Align = getAlignmentValue32(PtrOp1); Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); @@ -3827,7 +3835,8 @@ break; default: llvm_unreachable("unknown vld_dup intrinsic?"); } - Function *F = CGM.getIntrinsic(Int, Ty); + llvm::Type *Tys[] = {Ty, Int8PtrTy}; + Function *F = CGM.getIntrinsic(Int, Tys); llvm::StructType *STy = cast(F->getReturnType()); SmallVector Args; @@ -3902,8 +3911,9 @@ Value *SV = llvm::ConstantVector::get(cast(Ops[2])); Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); Ops[2] = getAlignmentValue32(PtrOp0); + llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, - Ops[1]->getType()), Ops); + Tys), Ops); } // fall through case NEON::BI__builtin_neon_vst1_lane_v: { Index: cfe/trunk/test/CodeGen/arm-neon-misc.c =================================================================== --- cfe/trunk/test/CodeGen/arm-neon-misc.c +++ cfe/trunk/test/CodeGen/arm-neon-misc.c @@ -14,20 +14,20 @@ void t1(uint64_t *src, uint8_t *dst) { // CHECK: @t1 uint64x2_t q = vld1q_u64(src); -// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64 +// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8 vst1q_lane_u64(dst, q, 1); // CHECK: bitcast <16 x i8> %{{.*}} to <2 x i64> // CHECK: shufflevector <2 x i64> -// CHECK: call void @llvm.arm.neon.vst1.v1i64 +// CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64 } void t2(uint64_t *src1, uint8_t *src2, uint64x2_t *dst) { // CHECK: @t2 uint64x2_t q = vld1q_u64(src1); -// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64 +// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8 q = vld1q_lane_u64(src2, q, 0); // CHECK: shufflevector <2 x i64> -// CHECK: call <1 x i64> @llvm.arm.neon.vld1.v1i64 +// CHECK: call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8 // CHECK: shufflevector <1 x i64> *dst = q; // CHECK: store <2 x i64> Index: cfe/trunk/test/CodeGen/arm-vector-align.c =================================================================== --- cfe/trunk/test/CodeGen/arm-vector-align.c +++ cfe/trunk/test/CodeGen/arm-vector-align.c @@ -14,9 +14,9 @@ typedef float AlignedAddr __attribute__ ((aligned (16))); void t1(AlignedAddr *addr1, AlignedAddr *addr2) { // CHECK: @t1 -// CHECK: call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %{{.*}}, i32 16) +// CHECK: call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %{{.*}}, i32 16) float32x4_t a = vld1q_f32(addr1); -// CHECK: call void @llvm.arm.neon.vst1.v4f32(i8* %{{.*}}, <4 x float> %{{.*}}, i32 16) +// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %{{.*}}, <4 x float> %{{.*}}, i32 16) vst1q_f32(addr2, a); } Index: cfe/trunk/test/CodeGen/vld_dup.c =================================================================== --- cfe/trunk/test/CodeGen/vld_dup.c +++ cfe/trunk/test/CodeGen/vld_dup.c @@ -14,7 +14,7 @@ int64_t v7[4]; v1 = vld3_dup_s32(v0); -// CHECK: [[T168:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}}) +// CHECK: [[T168:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}}) // CHECK-NEXT: [[T169:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T168]], 0 // CHECK-NEXT: [[T170:%.*]] = shufflevector <2 x i32> [[T169]], <2 x i32> [[T169]], <2 x i32> zeroinitializer // CHECK-NEXT: [[T171:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T168]], <2 x i32> [[T170]], 0 @@ -26,7 +26,7 @@ // CHECK-NEXT: [[T177:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T174]], <2 x i32> [[T176]], 2 v3 = vld4_dup_s32(v2); -// CHECK: [[T178:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}}) +// CHECK: [[T178:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}}) // CHECK-NEXT: [[T179:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T178]], 0 // CHECK-NEXT: [[T180:%.*]] = shufflevector <2 x i32> [[T179]], <2 x i32> [[T179]], <2 x i32> zeroinitializer // CHECK-NEXT: [[T181:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T178]], <2 x i32> [[T180]], 0 @@ -41,10 +41,10 @@ // CHECK-NEXT: [[T190:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T187]], <2 x i32> [[T189]], 3 v4 = vld3_dup_s64(v6); -// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* {{.*}}, i32 {{[0-9]+}}) +// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* {{.*}}, i32 {{[0-9]+}}) v5 = vld4_dup_s64(v7); -// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* {{.*}}, i32 {{[0-9]+}}) +// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* {{.*}}, i32 {{[0-9]+}}) return 0; }