Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -467,13 +467,11 @@ } }; - struct DestroyNRVOVariable final : EHScopeStack::Cleanup { - DestroyNRVOVariable(Address addr, - const CXXDestructorDecl *Dtor, - llvm::Value *NRVOFlag) - : Dtor(Dtor), NRVOFlag(NRVOFlag), Loc(addr) {} + template + struct DestroyNRVOVariable : EHScopeStack::Cleanup { + DestroyNRVOVariable(Address addr, llvm::Value *NRVOFlag) + : NRVOFlag(NRVOFlag), Loc(addr) {} - const CXXDestructorDecl *Dtor; llvm::Value *NRVOFlag; Address Loc; @@ -492,12 +490,39 @@ CGF.EmitBlock(RunDtorBB); } + static_cast(this)->emitDestructorCall(CGF); + + if (NRVO) CGF.EmitBlock(SkipDtorBB); + } + + virtual ~DestroyNRVOVariable() = default; + }; + + struct DestroyNRVOVariableCXX final + : DestroyNRVOVariable { + DestroyNRVOVariableCXX(Address addr, const CXXDestructorDecl *Dtor, + llvm::Value *NRVOFlag) + : DestroyNRVOVariable(addr, NRVOFlag), + Dtor(Dtor) {} + + const CXXDestructorDecl *Dtor; + + void emitDestructorCall(CodeGenFunction &CGF) { CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, - Loc); + /*Delegating=*/false, Loc); + } + }; - if (NRVO) CGF.EmitBlock(SkipDtorBB); + struct DestroyNRVOVariableC final + : DestroyNRVOVariable { + DestroyNRVOVariableC(Address addr, llvm::Value *NRVOFlag, QualType Ty) + : DestroyNRVOVariable(addr, NRVOFlag), Ty(Ty) {} + + QualType Ty; + + void emitDestructorCall(CodeGenFunction &CGF) { + CGF.destroyNonTrivialCStruct(CGF, Loc, Ty); } }; @@ -1088,7 +1113,10 @@ address = ReturnValue; if (const RecordType *RecordTy = Ty->getAs()) { - if (!cast(RecordTy->getDecl())->hasTrivialDestructor()) { + const auto *RD = RecordTy->getDecl(); + const auto *CXXRD = dyn_cast(RD); + if ((CXXRD && !CXXRD->hasTrivialDestructor()) || + RD->isNonTrivialToPrimitiveDestroy()) { // Create a flag that is used to indicate when the NRVO was applied // to this variable. Set it to zero to indicate that NRVO was not // applied. @@ -1461,8 +1489,8 @@ if (emission.NRVOFlag) { assert(!type->isArrayType()); CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor(); - EHStack.pushCleanup(cleanupKind, addr, - dtor, emission.NRVOFlag); + EHStack.pushCleanup(cleanupKind, addr, dtor, + emission.NRVOFlag); return; } break; @@ -1484,6 +1512,12 @@ case QualType::DK_nontrivial_c_struct: destroyer = CodeGenFunction::destroyNonTrivialCStruct; + if (emission.NRVOFlag) { + assert(!type->isArrayType()); + EHStack.pushCleanup(cleanupKind, addr, + emission.NRVOFlag, type); + return; + } break; } Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -12713,8 +12713,8 @@ // Try to apply the named return value optimization. We have to check // if we can do this here because lambdas keep return statements around // to deduce an implicit return type. - if (getLangOpts().CPlusPlus && FD->getReturnType()->isRecordType() && - !FD->isDependentContext()) + if (FD->getReturnType()->isRecordType() && + (!getLangOpts().CPlusPlus || !FD->isDependentContext())) computeNRVO(Body, getCurFunction()); } Index: lib/Sema/SemaStmt.cpp =================================================================== --- lib/Sema/SemaStmt.cpp +++ lib/Sema/SemaStmt.cpp @@ -2872,9 +2872,6 @@ /// NRVO, or NULL if there is no such candidate. VarDecl *Sema::getCopyElisionCandidate(QualType ReturnType, Expr *E, CopyElisionSemanticsKind CESK) { - if (!getLangOpts().CPlusPlus) - return nullptr; - // - in a return statement in a function [where] ... // ... the expression is the name of a non-volatile automatic object ... DeclRefExpr *DR = dyn_cast(E->IgnoreParens()); Index: test/CodeGen/64bit-swiftcall.c =================================================================== --- test/CodeGen/64bit-swiftcall.c +++ test/CodeGen/64bit-swiftcall.c @@ -108,9 +108,7 @@ TEST(struct_1); // CHECK-LABEL: define swiftcc { i64, i64 } @return_struct_1() {{.*}}{ // CHECK: [[RET:%.*]] = alloca [[STRUCT1:%.*]], align 4 -// CHECK: [[VAR:%.*]] = alloca [[STRUCT1]], align 4 // CHECK: call void @llvm.memset -// CHECK: call void @llvm.memcpy // CHECK: [[CAST:%.*]] = bitcast [[STRUCT1]]* %retval to { i64, i64 }* // CHECK: [[GEP0:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[T0:%.*]] = load i64, i64* [[GEP0]], align 4 @@ -158,12 +156,8 @@ TEST(struct_2); // CHECK-LABEL: define swiftcc { i64, i64 } @return_struct_2() {{.*}}{ // CHECK: [[RET:%.*]] = alloca [[STRUCT2_TYPE]], align 4 -// CHECK: [[VAR:%.*]] = alloca [[STRUCT2_TYPE]], align 4 -// CHECK: [[CASTVAR:%.*]] = bitcast {{.*}} [[VAR]] +// CHECK: [[CASTVAR:%.*]] = bitcast {{.*}} [[RET]] // CHECK: call void @llvm.memcpy{{.*}}({{.*}}[[CASTVAR]], {{.*}}[[STRUCT2_RESULT]] -// CHECK: [[CASTRET:%.*]] = bitcast {{.*}} [[RET]] -// CHECK: [[CASTVAR:%.*]] = bitcast {{.*}} [[VAR]] -// CHECK: call void @llvm.memcpy{{.*}}({{.*}}[[CASTRET]], {{.*}}[[CASTVAR]] // CHECK: [[CAST:%.*]] = bitcast [[STRUCT2_TYPE]]* [[RET]] to { i64, i64 }* // CHECK: [[GEP0:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[T0:%.*]] = load i64, i64* [[GEP0]], align 4 @@ -214,12 +208,8 @@ TEST(struct_misaligned_1) // CHECK-LABEL: define swiftcc i64 @return_struct_misaligned_1() // CHECK: [[RET:%.*]] = alloca [[STRUCT:%.*]], align 1 -// CHECK: [[RES:%.*]] = alloca [[STRUCT]], align 1 -// CHECK: [[CAST:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8* +// CHECK: [[CAST:%.*]] = bitcast [[STRUCT]]* [[RET]] to i8* // CHECK: call void @llvm.memset{{.*}}(i8* align 1 [[CAST]], i8 0, i64 5 -// CHECK: [[CASTRET:%.*]] = bitcast [[STRUCT]]* [[RET]] to i8* -// CHECK: [[CASTRES:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}(i8* align 1 [[CASTRET]], i8* align 1 [[CASTRES]], i64 5 // CHECK: [[CAST:%.*]] = bitcast [[STRUCT]]* [[RET]] to { i64 }* // CHECK: [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, i64* [[GEP]], align 1 @@ -267,12 +257,8 @@ TEST(union_het_fp) // CHECK-LABEL: define swiftcc i64 @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[UNION:%.*]], align 8 -// CHECK: [[RES:%.*]] = alloca [[UNION]], align 8 -// CHECK: [[CAST:%.*]] = bitcast [[UNION]]* [[RES]] to i8* +// CHECK: [[CAST:%.*]] = bitcast [[UNION]]* [[RET]] to i8* // CHECK: call void @llvm.memcpy{{.*}}(i8* align 8 [[CAST]] -// CHECK: [[CASTRET:%.*]] = bitcast [[UNION]]* [[RET]] to i8* -// CHECK: [[CASTRES:%.*]] = bitcast [[UNION]]* [[RES]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}(i8* align 8 [[CASTRET]], i8* align 8 [[CASTRES]] // CHECK: [[CAST:%.*]] = bitcast [[UNION]]* [[RET]] to { i64 }* // CHECK: [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, i64* [[GEP]], align 8 Index: test/CodeGen/aarch64-neon-perm.c =================================================================== --- test/CodeGen/aarch64-neon-perm.c +++ test/CodeGen/aarch64-neon-perm.c @@ -888,18 +888,14 @@ // CHECK-LABEL: @test_vuzp_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0 @@ -912,9 +908,8 @@ // CHECK-LABEL: @test_vuzp_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -923,9 +918,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0 @@ -938,9 +930,8 @@ // CHECK-LABEL: @test_vuzp_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* @@ -949,9 +940,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0 @@ -964,18 +952,14 @@ // CHECK-LABEL: @test_vuzp_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0 @@ -988,9 +972,8 @@ // CHECK-LABEL: @test_vuzp_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -999,9 +982,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0 @@ -1014,9 +994,8 @@ // CHECK-LABEL: @test_vuzp_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* @@ -1025,9 +1004,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0 @@ -1040,9 +1016,8 @@ // CHECK-LABEL: @test_vuzp_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* @@ -1051,9 +1026,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0 @@ -1066,18 +1038,14 @@ // CHECK-LABEL: @test_vuzp_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0 @@ -1090,9 +1058,8 @@ // CHECK-LABEL: @test_vuzp_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -1101,9 +1068,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0 @@ -1116,18 +1080,14 @@ // CHECK-LABEL: @test_vuzpq_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0 @@ -1140,9 +1100,8 @@ // CHECK-LABEL: @test_vuzpq_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -1151,9 +1110,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0 @@ -1166,9 +1122,8 @@ // CHECK-LABEL: @test_vuzpq_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* @@ -1177,9 +1132,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0 @@ -1192,18 +1144,14 @@ // CHECK-LABEL: @test_vuzpq_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0 @@ -1216,9 +1164,8 @@ // CHECK-LABEL: @test_vuzpq_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -1227,9 +1174,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0 @@ -1242,9 +1186,8 @@ // CHECK-LABEL: @test_vuzpq_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* @@ -1253,9 +1196,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0 @@ -1268,9 +1208,8 @@ // CHECK-LABEL: @test_vuzpq_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* @@ -1279,9 +1218,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0 @@ -1294,18 +1230,14 @@ // CHECK-LABEL: @test_vuzpq_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0 @@ -1318,9 +1250,8 @@ // CHECK-LABEL: @test_vuzpq_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -1329,9 +1260,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0 @@ -1344,18 +1272,14 @@ // CHECK-LABEL: @test_vzip_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0 @@ -1368,9 +1292,8 @@ // CHECK-LABEL: @test_vzip_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -1379,9 +1302,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0 @@ -1394,9 +1314,8 @@ // CHECK-LABEL: @test_vzip_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* @@ -1405,9 +1324,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0 @@ -1420,18 +1336,14 @@ // CHECK-LABEL: @test_vzip_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0 @@ -1444,9 +1356,8 @@ // CHECK-LABEL: @test_vzip_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -1455,9 +1366,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0 @@ -1470,9 +1378,8 @@ // CHECK-LABEL: @test_vzip_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* @@ -1481,9 +1388,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0 @@ -1496,9 +1400,8 @@ // CHECK-LABEL: @test_vzip_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* @@ -1507,9 +1410,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0 @@ -1522,18 +1422,14 @@ // CHECK-LABEL: @test_vzip_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0 @@ -1546,9 +1442,8 @@ // CHECK-LABEL: @test_vzip_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -1557,9 +1452,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0 @@ -1572,18 +1464,14 @@ // CHECK-LABEL: @test_vzipq_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0 @@ -1596,9 +1484,8 @@ // CHECK-LABEL: @test_vzipq_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -1607,9 +1494,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0 @@ -1622,9 +1506,8 @@ // CHECK-LABEL: @test_vzipq_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* @@ -1633,9 +1516,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0 @@ -1648,18 +1528,14 @@ // CHECK-LABEL: @test_vzipq_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0 @@ -1672,9 +1548,8 @@ // CHECK-LABEL: @test_vzipq_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -1683,9 +1558,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0 @@ -1698,9 +1570,8 @@ // CHECK-LABEL: @test_vzipq_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* @@ -1709,9 +1580,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0 @@ -1724,9 +1592,8 @@ // CHECK-LABEL: @test_vzipq_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* @@ -1735,9 +1602,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0 @@ -1750,18 +1614,14 @@ // CHECK-LABEL: @test_vzipq_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0 @@ -1774,9 +1634,8 @@ // CHECK-LABEL: @test_vzipq_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -1785,9 +1644,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0 @@ -1800,18 +1656,14 @@ // CHECK-LABEL: @test_vtrn_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0 @@ -1824,9 +1676,8 @@ // CHECK-LABEL: @test_vtrn_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -1835,9 +1686,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0 @@ -1850,9 +1698,8 @@ // CHECK-LABEL: @test_vtrn_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* @@ -1861,9 +1708,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0 @@ -1876,18 +1720,14 @@ // CHECK-LABEL: @test_vtrn_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0 @@ -1900,9 +1740,8 @@ // CHECK-LABEL: @test_vtrn_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -1911,9 +1750,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0 @@ -1926,9 +1762,8 @@ // CHECK-LABEL: @test_vtrn_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* @@ -1937,9 +1772,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0 @@ -1952,9 +1784,8 @@ // CHECK-LABEL: @test_vtrn_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* @@ -1963,9 +1794,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> // CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0 @@ -1978,18 +1806,14 @@ // CHECK-LABEL: @test_vtrn_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0 @@ -2002,9 +1826,8 @@ // CHECK-LABEL: @test_vtrn_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* @@ -2013,9 +1836,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0 @@ -2028,18 +1848,14 @@ // CHECK-LABEL: @test_vtrnq_s8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0 @@ -2052,9 +1868,8 @@ // CHECK-LABEL: @test_vtrnq_s16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -2063,9 +1878,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0 @@ -2078,9 +1890,8 @@ // CHECK-LABEL: @test_vtrnq_s32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* @@ -2089,9 +1900,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0 @@ -2104,18 +1912,14 @@ // CHECK-LABEL: @test_vtrnq_u8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0 @@ -2128,9 +1932,8 @@ // CHECK-LABEL: @test_vtrnq_u16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -2139,9 +1942,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0 @@ -2154,9 +1954,8 @@ // CHECK-LABEL: @test_vtrnq_u32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* @@ -2165,9 +1964,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0 @@ -2180,9 +1976,8 @@ // CHECK-LABEL: @test_vtrnq_f32( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* @@ -2191,9 +1986,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> // CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0 @@ -2206,18 +1998,14 @@ // CHECK-LABEL: @test_vtrnq_p8( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i64 32, i1 false) #2 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0 @@ -2230,9 +2018,8 @@ // CHECK-LABEL: @test_vtrnq_p16( // CHECK: [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* @@ -2241,9 +2028,6 @@ // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) #2 // CHECK: [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0 Index: test/CodeGen/aarch64-v8.2a-neon-intrinsics.c =================================================================== --- test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -1348,16 +1348,13 @@ // CHECK-LABEL: test_vzip_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> // CHECK: store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> // CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { return vzip_f16(a, b); } @@ -1365,16 +1362,13 @@ // CHECK-LABEL: test_vzipq_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> // CHECK: store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> // CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { return vzipq_f16(a, b); } @@ -1382,16 +1376,13 @@ // CHECK-LABEL: test_vuzp_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> // CHECK: store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> // CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { return vuzp_f16(a, b); } @@ -1399,16 +1390,13 @@ // CHECK-LABEL: test_vuzpq_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> // CHECK: store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> // CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { return vuzpq_f16(a, b); } @@ -1416,16 +1404,13 @@ // CHECK-LABEL: test_vtrn_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> // CHECK: store <4 x half> [[VZIP0_I]], <4 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, <4 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> // CHECK: store <4 x half> [[VZIP1_I]], <4 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 16, i1 false) float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { return vtrn_f16(a, b); } @@ -1433,16 +1418,13 @@ // CHECK-LABEL: test_vtrnq_f16 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__RET_I:%.*]] = alloca %struct.float16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* +// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x half>* // CHECK: [[VZIP0_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> // CHECK: store <8 x half> [[VZIP0_I]], <8 x half>* [[TMP1]] // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, <8 x half>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> // CHECK: store <8 x half> [[VZIP1_I]], <8 x half>* [[TMP2]] -// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i64 32, i1 false) float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { return vtrnq_f16(a, b); } Index: test/CodeGen/aggregate-assign-call.c =================================================================== --- test/CodeGen/aggregate-assign-call.c +++ test/CodeGen/aggregate-assign-call.c @@ -18,8 +18,6 @@ // O0-NOT: @llvm.lifetime.end struct S r; - // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[R_TMP:[^)]+]]) - // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP1:[^)]+]]) // O1: call void @foo r = foo(); @@ -35,7 +33,6 @@ r = foo(); // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP3]]) - // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[R_TMP]]) return r; } @@ -51,11 +48,8 @@ // O0-NOT: @llvm.lifetime.end struct S r; - // O1: %[[RESULT_ALLOCA:[^ ]+]] = alloca %struct.S // O1: %[[TMP1_ALLOCA:[^ ]+]] = alloca %struct.S // O1: %[[TMP2_ALLOCA:[^ ]+]] = alloca %struct.S - // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[RESULT_ALLOCA]] to i8* - // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]]) // O1: br label %[[DO_BODY:.+]] do { @@ -94,8 +88,6 @@ } while (1); // O1: [[DO_END]]: - // O1: call void @llvm.memcpy - // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[RESULT_ALLOCA]] to i8* - // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]]) + // O1-NEXT: ret void return r; } Index: test/CodeGen/arm-swiftcall.c =================================================================== --- test/CodeGen/arm-swiftcall.c +++ test/CodeGen/arm-swiftcall.c @@ -103,9 +103,7 @@ TEST(struct_1); // CHECK-LABEL: define {{.*}} @return_struct_1() // CHECK: [[RET:%.*]] = alloca [[REC:%.*]], align 4 -// CHECK: [[VAR:%.*]] = alloca [[REC]], align 4 // CHECK: @llvm.memset -// CHECK: @llvm.memcpy // CHECK: [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ i32, i16, \[2 x i8\], float, float }]]* // CHECK: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0 // CHECK: [[FIRST:%.*]] = load i32, i32* [[T0]], align 4 @@ -170,8 +168,6 @@ TEST(struct_2); // CHECK-LABEL: define {{.*}} @return_struct_2() // CHECK: [[RET:%.*]] = alloca [[REC:%.*]], align 4 -// CHECK: [[VAR:%.*]] = alloca [[REC]], align 4 -// CHECK: @llvm.memcpy // CHECK: @llvm.memcpy // CHECK: [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ i32, i32, float, float }]]* // CHECK: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0 @@ -240,9 +236,7 @@ TEST(struct_misaligned_1) // CHECK-LABEL: define {{.*}} @return_struct_misaligned_1() // CHECK: [[RET:%.*]] = alloca [[REC:%.*]], align -// CHECK: [[VAR:%.*]] = alloca [[REC]], align // CHECK: @llvm.memset -// CHECK: @llvm.memcpy // CHECK: [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ i32, i8 }]]* // CHECK: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0 // CHECK: [[FIRST:%.*]] = load i32, i32* [[T0]], align @@ -282,8 +276,6 @@ TEST(union_het_fp) // CHECK-LABEL: define {{.*}} @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[REC:%.*]], align {{(4|8)}} -// CHECK: [[VAR:%.*]] = alloca [[REC]], align {{(4|8)}} -// CHECK: @llvm.memcpy // CHECK: @llvm.memcpy // CHECK: [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ i32, i32 }]]* // CHECK: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0 @@ -414,7 +406,6 @@ TEST(int8) // CHECK-LABEL: define {{.*}} @return_int8() // CHECK: [[RET:%.*]] = alloca [[REC:<8 x i32>]], align 32 -// CHECK: [[VAR:%.*]] = alloca [[REC]], align // CHECK: store // CHECK: load // CHECK: store @@ -458,7 +449,6 @@ TEST(int5) // CHECK-LABEL: define {{.*}} @return_int5() // CHECK: [[RET:%.*]] = alloca [[REC:<5 x i32>]], align 32 -// CHECK: [[VAR:%.*]] = alloca [[REC]], align // CHECK: store // CHECK: load // CHECK: store Index: test/CodeGen/arm_neon_intrinsics.c =================================================================== --- test/CodeGen/arm_neon_intrinsics.c +++ test/CodeGen/arm_neon_intrinsics.c @@ -20463,331 +20463,259 @@ return vtbx4_p8(a, b, c); } -// CHECK-LABEL: @test_vtrn_s8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrn_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], !noalias !3 +// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], !alias.scope !3 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !3 -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) +// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !alias.scope !3 // CHECK: ret void int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { return vtrn_s8(a, b); } -// CHECK-LABEL: @test_vtrn_s16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrn_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], !noalias !6 +// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], !alias.scope !6 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !6 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !alias.scope !6 // CHECK: ret void int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { return vtrn_s16(a, b); } -// CHECK-LABEL: @test_vtrn_s32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrn_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], !noalias !9 +// CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], !alias.scope !9 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !noalias !9 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !alias.scope !9 // CHECK: ret void int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { return vtrn_s32(a, b); } -// CHECK-LABEL: @test_vtrn_u8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrn_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], !noalias !12 +// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], !alias.scope !12 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !12 -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) +// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !alias.scope !12 // CHECK: ret void uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { return vtrn_u8(a, b); } -// CHECK-LABEL: @test_vtrn_u16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrn_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], !noalias !15 +// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], !alias.scope !15 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !15 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !alias.scope !15 // CHECK: ret void uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { return vtrn_u16(a, b); } -// CHECK-LABEL: @test_vtrn_u32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrn_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], !noalias !18 +// CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], !alias.scope !18 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !noalias !18 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !alias.scope !18 // CHECK: ret void uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { return vtrn_u32(a, b); } -// CHECK-LABEL: @test_vtrn_f32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrn_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> -// CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]], !noalias !21 +// CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]], !alias.scope !21 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> -// CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]], !noalias !21 -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]], !alias.scope !21 // CHECK: ret void float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { return vtrn_f32(a, b); } -// CHECK-LABEL: @test_vtrn_p8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrn_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], !noalias !24 +// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], !alias.scope !24 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !24 -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) +// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !alias.scope !24 // CHECK: ret void poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { return vtrn_p8(a, b); } -// CHECK-LABEL: @test_vtrn_p16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrn_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], !noalias !27 +// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], !alias.scope !27 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !27 -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !alias.scope !27 // CHECK: ret void poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { return vtrn_p16(a, b); } -// CHECK-LABEL: @test_vtrnq_s8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrnq_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], !noalias !30 +// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], !alias.scope !30 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !30 -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) +// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !alias.scope !30 // CHECK: ret void int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { return vtrnq_s8(a, b); } -// CHECK-LABEL: @test_vtrnq_s16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrnq_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], !noalias !33 +// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], !alias.scope !33 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !33 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !alias.scope !33 // CHECK: ret void int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { return vtrnq_s16(a, b); } -// CHECK-LABEL: @test_vtrnq_s32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrnq_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], !noalias !36 +// CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], !alias.scope !36 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !36 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !alias.scope !36 // CHECK: ret void int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { return vtrnq_s32(a, b); } -// CHECK-LABEL: @test_vtrnq_u8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrnq_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], !noalias !39 +// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], !alias.scope !39 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !39 -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) +// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !alias.scope !39 // CHECK: ret void uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { return vtrnq_u8(a, b); } -// CHECK-LABEL: @test_vtrnq_u16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrnq_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], !noalias !42 +// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], !alias.scope !42 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !42 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !alias.scope !42 // CHECK: ret void uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { return vtrnq_u16(a, b); } -// CHECK-LABEL: @test_vtrnq_u32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrnq_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], !noalias !45 +// CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], !alias.scope !45 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !45 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !alias.scope !45 // CHECK: ret void uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { return vtrnq_u32(a, b); } -// CHECK-LABEL: @test_vtrnq_f32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrnq_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> -// CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]], !noalias !48 +// CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]], !alias.scope !48 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> -// CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], !noalias !48 -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], !alias.scope !48 // CHECK: ret void float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { return vtrnq_f32(a, b); } -// CHECK-LABEL: @test_vtrnq_p8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrnq_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], !noalias !51 +// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], !alias.scope !51 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !51 -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) +// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !alias.scope !51 // CHECK: ret void poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { return vtrnq_p8(a, b); } -// CHECK-LABEL: @test_vtrnq_p16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vtrnq_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], !noalias !54 +// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], !alias.scope !54 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !54 -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !alias.scope !54 // CHECK: ret void poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { return vtrnq_p16(a, b); @@ -20957,661 +20885,517 @@ return vtstq_p16(a, b); } -// CHECK-LABEL: @test_vuzp_s8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzp_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], !noalias !57 +// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], !alias.scope !57 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !57 -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) +// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !alias.scope !57 // CHECK: ret void int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { return vuzp_s8(a, b); } -// CHECK-LABEL: @test_vuzp_s16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzp_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], !noalias !60 +// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], !alias.scope !60 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !60 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !alias.scope !60 // CHECK: ret void int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { return vuzp_s16(a, b); } -// CHECK-LABEL: @test_vuzp_s32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzp_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], !noalias !63 +// CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], !alias.scope !63 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !noalias !63 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !alias.scope !63 // CHECK: ret void int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { return vuzp_s32(a, b); } -// CHECK-LABEL: @test_vuzp_u8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzp_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], !noalias !66 +// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], !alias.scope !66 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !66 -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) +// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !alias.scope !66 // CHECK: ret void uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { return vuzp_u8(a, b); } -// CHECK-LABEL: @test_vuzp_u16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzp_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], !noalias !69 +// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], !alias.scope !69 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !69 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !alias.scope !69 // CHECK: ret void uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { return vuzp_u16(a, b); } -// CHECK-LABEL: @test_vuzp_u32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzp_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], !noalias !72 +// CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], !alias.scope !72 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !noalias !72 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !alias.scope !72 // CHECK: ret void uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { return vuzp_u32(a, b); } -// CHECK-LABEL: @test_vuzp_f32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzp_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> -// CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]], !noalias !75 +// CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]], !alias.scope !75 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> -// CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]], !noalias !75 -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]], !alias.scope !75 // CHECK: ret void float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { return vuzp_f32(a, b); } -// CHECK-LABEL: @test_vuzp_p8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzp_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], !noalias !78 +// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], !alias.scope !78 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !78 -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) +// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !alias.scope !78 // CHECK: ret void poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { return vuzp_p8(a, b); } -// CHECK-LABEL: @test_vuzp_p16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzp_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], !noalias !81 +// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], !alias.scope !81 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !81 -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !alias.scope !81 // CHECK: ret void poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { return vuzp_p16(a, b); } -// CHECK-LABEL: @test_vuzpq_s8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzpq_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], !noalias !84 +// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], !alias.scope !84 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !84 -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) +// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !alias.scope !84 // CHECK: ret void int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { return vuzpq_s8(a, b); } -// CHECK-LABEL: @test_vuzpq_s16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzpq_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], !noalias !87 +// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], !alias.scope !87 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !87 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !alias.scope !87 // CHECK: ret void int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { return vuzpq_s16(a, b); } -// CHECK-LABEL: @test_vuzpq_s32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzpq_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], !noalias !90 +// CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], !alias.scope !90 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !90 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !alias.scope !90 // CHECK: ret void int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { return vuzpq_s32(a, b); } -// CHECK-LABEL: @test_vuzpq_u8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzpq_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], !noalias !93 +// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], !alias.scope !93 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !93 -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) +// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !alias.scope !93 // CHECK: ret void uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { return vuzpq_u8(a, b); } -// CHECK-LABEL: @test_vuzpq_u16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzpq_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], !noalias !96 +// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], !alias.scope !96 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !96 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !alias.scope !96 // CHECK: ret void uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { return vuzpq_u16(a, b); } -// CHECK-LABEL: @test_vuzpq_u32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzpq_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], !noalias !99 +// CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], !alias.scope !99 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !99 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !alias.scope !99 // CHECK: ret void uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { return vuzpq_u32(a, b); } -// CHECK-LABEL: @test_vuzpq_f32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzpq_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> -// CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]], !noalias !102 +// CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]], !alias.scope !102 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> -// CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], !noalias !102 -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], !alias.scope !102 // CHECK: ret void float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { return vuzpq_f32(a, b); } -// CHECK-LABEL: @test_vuzpq_p8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzpq_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], !noalias !105 +// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], !alias.scope !105 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !105 -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) +// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !alias.scope !105 // CHECK: ret void poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { return vuzpq_p8(a, b); } -// CHECK-LABEL: @test_vuzpq_p16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vuzpq_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], !noalias !108 +// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], !alias.scope !108 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !108 -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !alias.scope !108 // CHECK: ret void poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { return vuzpq_p16(a, b); } -// CHECK-LABEL: @test_vzip_s8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzip_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], !noalias !111 +// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], !alias.scope !111 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !111 -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) +// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !alias.scope !111 // CHECK: ret void int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { return vzip_s8(a, b); } -// CHECK-LABEL: @test_vzip_s16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzip_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], !noalias !114 +// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], !alias.scope !114 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !114 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !alias.scope !114 // CHECK: ret void int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { return vzip_s16(a, b); } -// CHECK-LABEL: @test_vzip_s32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzip_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], !noalias !117 +// CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], !alias.scope !117 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !noalias !117 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !alias.scope !117 // CHECK: ret void int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { return vzip_s32(a, b); } -// CHECK-LABEL: @test_vzip_u8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzip_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], !noalias !120 +// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], !alias.scope !120 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !120 -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) +// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !alias.scope !120 // CHECK: ret void uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { return vzip_u8(a, b); } -// CHECK-LABEL: @test_vzip_u16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzip_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], !noalias !123 +// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], !alias.scope !123 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !123 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !alias.scope !123 // CHECK: ret void uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { return vzip_u16(a, b); } -// CHECK-LABEL: @test_vzip_u32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzip_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], !noalias !126 +// CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], !alias.scope !126 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> -// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !noalias !126 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !alias.scope !126 // CHECK: ret void uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { return vzip_u32(a, b); } -// CHECK-LABEL: @test_vzip_f32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzip_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> -// CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]], !noalias !129 +// CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]], !alias.scope !129 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> -// CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]], !noalias !129 -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]], !alias.scope !129 // CHECK: ret void float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { return vzip_f32(a, b); } -// CHECK-LABEL: @test_vzip_p8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzip_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], !noalias !132 +// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], !alias.scope !132 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !132 -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i32 16, i1 false) +// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !alias.scope !132 // CHECK: ret void poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { return vzip_p8(a, b); } -// CHECK-LABEL: @test_vzip_p16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzip_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], !noalias !135 +// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], !alias.scope !135 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> -// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !135 -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i32 16, i1 false) +// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !alias.scope !135 // CHECK: ret void poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { return vzip_p16(a, b); } -// CHECK-LABEL: @test_vzipq_s8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzipq_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], !noalias !138 +// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], !alias.scope !138 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !138 -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) +// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !alias.scope !138 // CHECK: ret void int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { return vzipq_s8(a, b); } -// CHECK-LABEL: @test_vzipq_s16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzipq_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], !noalias !141 +// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], !alias.scope !141 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !141 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !alias.scope !141 // CHECK: ret void int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { return vzipq_s16(a, b); } -// CHECK-LABEL: @test_vzipq_s32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzipq_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], !noalias !144 +// CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], !alias.scope !144 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !144 -// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !alias.scope !144 // CHECK: ret void int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { return vzipq_s32(a, b); } -// CHECK-LABEL: @test_vzipq_u8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzipq_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], !noalias !147 +// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], !alias.scope !147 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !147 -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) +// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !alias.scope !147 // CHECK: ret void uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { return vzipq_u8(a, b); } -// CHECK-LABEL: @test_vzipq_u16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzipq_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], !noalias !150 +// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], !alias.scope !150 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !150 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !alias.scope !150 // CHECK: ret void uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { return vzipq_u16(a, b); } -// CHECK-LABEL: @test_vzipq_u32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzipq_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], !noalias !153 +// CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], !alias.scope !153 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !153 -// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !alias.scope !153 // CHECK: ret void uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { return vzipq_u32(a, b); } -// CHECK-LABEL: @test_vzipq_f32( -// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzipq_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> -// CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]], !noalias !156 +// CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]], !alias.scope !156 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> -// CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], !noalias !156 -// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], !alias.scope !156 // CHECK: ret void float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { return vzipq_f32(a, b); } -// CHECK-LABEL: @test_vzipq_p8( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzipq_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], !noalias !159 +// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], !alias.scope !159 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !159 -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP3]], i8* align 16 [[TMP4]], i32 32, i1 false) +// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !alias.scope !159 // CHECK: ret void poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { return vzipq_p8(a, b); } -// CHECK-LABEL: @test_vzipq_p16( -// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 -// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: @test_vzipq_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]], +// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], !noalias !162 +// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], !alias.scope !162 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !162 -// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP5]], i8* align 16 [[TMP6]], i32 32, i1 false) +// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !alias.scope !162 // CHECK: ret void poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) { return vzipq_p16(a, b); Index: test/CodeGen/atomic-ops.c =================================================================== --- test/CodeGen/atomic-ops.c +++ test/CodeGen/atomic-ops.c @@ -186,8 +186,8 @@ struct S fd1(struct S *a) { // CHECK-LABEL: @fd1 // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4 - // CHECK: [[RET:%.*]] = alloca %struct.S, align 4 - // CHECK: [[CAST:%.*]] = bitcast %struct.S* [[RET]] to i64* + // CHECK: bitcast %struct.S* {{.*}} to i64* + // CHECK: [[CAST:%.*]] = bitcast %struct.S* [[RETVAL]] to i64* // CHECK: [[CALL:%.*]] = call i64 @__atomic_load_8( // CHECK: store i64 [[CALL]], i64* [[CAST]], align 4 struct S ret; Index: test/CodeGen/ppc64-align-struct.c =================================================================== --- test/CodeGen/ppc64-align-struct.c +++ test/CodeGen/ppc64-align-struct.c @@ -48,13 +48,12 @@ { } -// CHECK-LABEL: define void @test1va(%struct.test1* noalias sret %agg.result, i32 signext %x, ...) -// CHECK: %y = alloca %struct.test1, align 4 +// CHECK: define void @test1va(%struct.test1* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...) // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8 // CHECK: store i8* %[[NEXT]], i8** %ap // CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test1* -// CHECK: [[DEST:%.*]] = bitcast %struct.test1* %y to i8* +// CHECK: [[DEST:%.*]] = bitcast %struct.test1* %[[AGG_RESULT]] to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test1* [[T0]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST]], i8* align 8 [[SRC]], i64 8, i1 false) struct test1 test1va (int x, ...) @@ -67,8 +66,7 @@ return y; } -// CHECK-LABEL: define void @test2va(%struct.test2* noalias sret %agg.result, i32 signext %x, ...) -// CHECK: %y = alloca %struct.test2, align 16 +// CHECK: define void @test2va(%struct.test2* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...) // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15 @@ -77,7 +75,7 @@ // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 16 // CHECK: store i8* %[[NEXT]], i8** %ap // CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test2* -// CHECK: [[DEST:%.*]] = bitcast %struct.test2* %y to i8* +// CHECK: [[DEST:%.*]] = bitcast %struct.test2* %[[AGG_RESULT]] to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test2* [[T0]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DEST]], i8* align 16 [[SRC]], i64 16, i1 false) struct test2 test2va (int x, ...) @@ -90,8 +88,7 @@ return y; } -// CHECK-LABEL: define void @test3va(%struct.test3* noalias sret %agg.result, i32 signext %x, ...) -// CHECK: %y = alloca %struct.test3, align 32 +// CHECK: define void @test3va(%struct.test3* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...) // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15 @@ -100,7 +97,7 @@ // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 32 // CHECK: store i8* %[[NEXT]], i8** %ap // CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test3* -// CHECK: [[DEST:%.*]] = bitcast %struct.test3* %y to i8* +// CHECK: [[DEST:%.*]] = bitcast %struct.test3* %[[AGG_RESULT]] to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test3* [[T0]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 [[DEST]], i8* align 16 [[SRC]], i64 32, i1 false) struct test3 test3va (int x, ...) @@ -113,13 +110,12 @@ return y; } -// CHECK-LABEL: define void @test4va(%struct.test4* noalias sret %agg.result, i32 signext %x, ...) -// CHECK: %y = alloca %struct.test4, align 4 +// CHECK: define void @test4va(%struct.test4* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...) // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16 // CHECK: store i8* %[[NEXT]], i8** %ap // CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test4* -// CHECK: [[DEST:%.*]] = bitcast %struct.test4* %y to i8* +// CHECK: [[DEST:%.*]] = bitcast %struct.test4* %[[AGG_RESULT]] to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test4* [[T0]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST]], i8* align 8 [[SRC]], i64 12, i1 false) struct test4 test4va (int x, ...) @@ -132,13 +128,12 @@ return y; } -// CHECK-LABEL: define void @testva_longdouble(%struct.test_longdouble* noalias sret %agg.result, i32 signext %x, ...) -// CHECK: %y = alloca %struct.test_longdouble, align 16 +// CHECK: define void @testva_longdouble(%struct.test_longdouble* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...) // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16 // CHECK: store i8* %[[NEXT]], i8** %ap // CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test_longdouble* -// CHECK: [[DEST:%.*]] = bitcast %struct.test_longdouble* %y to i8* +// CHECK: [[DEST:%.*]] = bitcast %struct.test_longdouble* %[[AGG_RESULT]] to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test_longdouble* [[T0]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DEST]], i8* align 8 [[SRC]], i64 16, i1 false) struct test_longdouble { long double x; }; @@ -152,8 +147,7 @@ return y; } -// CHECK-LABEL: define void @testva_vector(%struct.test_vector* noalias sret %agg.result, i32 signext %x, ...) -// CHECK: %y = alloca %struct.test_vector, align 16 +// CHECK: define void @testva_vector(%struct.test_vector* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...) // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15 @@ -162,7 +156,7 @@ // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 16 // CHECK: store i8* %[[NEXT]], i8** %ap // CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test_vector* -// CHECK: [[DEST:%.*]] = bitcast %struct.test_vector* %y to i8* +// CHECK: [[DEST:%.*]] = bitcast %struct.test_vector* %[[AGG_RESULT]] to i8* // CHECK: [[SRC:%.*]] = bitcast %struct.test_vector* [[T0]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DEST]], i8* align 16 [[SRC]], i64 16, i1 false) struct test_vector { vector int x; }; Index: test/CodeGen/wasm-varargs.c =================================================================== --- test/CodeGen/wasm-varargs.c +++ test/CodeGen/wasm-varargs.c @@ -80,10 +80,9 @@ return v; } -// CHECK: define void @test_struct([[STRUCT_S:%[^,=]+]]*{{.*}} noalias sret %agg.result, i8*{{.*}} %fmt, ...) {{.*}} { +// CHECK: define void @test_struct([[STRUCT_S:%[^,=]+]]*{{.*}} noalias sret [[AGG_RESULT:%.*]], i8*{{.*}} %fmt, ...) {{.*}} { // CHECK: [[FMT_ADDR:%[^,=]+]] = alloca i8*, align 4 // CHECK: [[VA:%[^,=]+]] = alloca i8*, align 4 -// CHECK: [[V:%[^,=]+]] = alloca [[STRUCT_S]], align 4 // CHECK: store i8* %fmt, i8** [[FMT_ADDR]], align 4 // CHECK: [[VA1:%[^,=]+]] = bitcast i8** [[VA]] to i8* // CHECK: call void @llvm.va_start(i8* [[VA1]]) @@ -91,13 +90,10 @@ // CHECK: [[ARGP_NEXT:%[^,=]+]] = getelementptr inbounds i8, i8* [[ARGP_CUR]], i32 12 // CHECK: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4 // CHECK: [[R3:%[^,=]+]] = bitcast i8* [[ARGP_CUR]] to [[STRUCT_S]]* -// CHECK: [[R4:%[^,=]+]] = bitcast [[STRUCT_S]]* [[V]] to i8* +// CHECK: [[R4:%[^,=]+]] = bitcast [[STRUCT_S]]* [[AGG_RESULT]] to i8* // CHECK: [[R5:%[^,=]+]] = bitcast [[STRUCT_S]]* [[R3]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[R4]], i8* align 4 [[R5]], i32 12, i1 false) // CHECK: [[VA2:%[^,=]+]] = bitcast i8** [[VA]] to i8* // CHECK: call void @llvm.va_end(i8* [[VA2]]) -// CHECK: [[R6:%[^,=]+]] = bitcast [[STRUCT_S]]* %agg.result to i8* -// CHECK: [[R7:%[^,=]+]] = bitcast [[STRUCT_S]]* [[V]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[R6]], i8* align 4 [[R7]], i32 12, i1 false) // CHECK: ret void // CHECK: } Index: test/CodeGen/windows-swiftcall.c =================================================================== --- test/CodeGen/windows-swiftcall.c +++ test/CodeGen/windows-swiftcall.c @@ -99,9 +99,7 @@ TEST(struct_1); // CHECK-LABEL: define dso_local swiftcc { i64, i64 } @return_struct_1() {{.*}}{ // CHECK: [[RET:%.*]] = alloca [[STRUCT1:%.*]], align 4 -// CHECK: [[VAR:%.*]] = alloca [[STRUCT1]], align 4 // CHECK: call void @llvm.memset -// CHECK: call void @llvm.memcpy // CHECK: [[CAST:%.*]] = bitcast [[STRUCT1]]* %retval to { i64, i64 }* // CHECK: [[GEP0:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[T0:%.*]] = load i64, i64* [[GEP0]], align 4 @@ -149,12 +147,8 @@ TEST(struct_2); // CHECK-LABEL: define dso_local swiftcc { i64, i64 } @return_struct_2() {{.*}}{ // CHECK: [[RET:%.*]] = alloca [[STRUCT2_TYPE]], align 4 -// CHECK: [[VAR:%.*]] = alloca [[STRUCT2_TYPE]], align 4 -// CHECK: [[CASTVAR:%.*]] = bitcast {{.*}} [[VAR]] +// CHECK: [[CASTVAR:%.*]] = bitcast {{.*}} [[RET]] // CHECK: call void @llvm.memcpy{{.*}}({{.*}}[[CASTVAR]], {{.*}}[[STRUCT2_RESULT]] -// CHECK: [[CASTRET:%.*]] = bitcast {{.*}} [[RET]] -// CHECK: [[CASTVAR:%.*]] = bitcast {{.*}} [[VAR]] -// CHECK: call void @llvm.memcpy{{.*}}({{.*}}[[CASTRET]], {{.*}}[[CASTVAR]] // CHECK: [[CAST:%.*]] = bitcast [[STRUCT2_TYPE]]* [[RET]] to { i64, i64 }* // CHECK: [[GEP0:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[T0:%.*]] = load i64, i64* [[GEP0]], align 4 @@ -205,12 +199,8 @@ TEST(struct_misaligned_1) // CHECK-LABEL: define dso_local swiftcc i64 @return_struct_misaligned_1() // CHECK: [[RET:%.*]] = alloca [[STRUCT:%.*]], align 1 -// CHECK: [[RES:%.*]] = alloca [[STRUCT]], align 1 -// CHECK: [[CAST:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8* +// CHECK: [[CAST:%.*]] = bitcast [[STRUCT]]* [[RET]] to i8* // CHECK: call void @llvm.memset{{.*}}(i8* align 1 [[CAST]], i8 0, i64 5 -// CHECK: [[CASTRET:%.*]] = bitcast [[STRUCT]]* [[RET]] to i8* -// CHECK: [[CASTRES:%.*]] = bitcast [[STRUCT]]* [[RES]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[CASTRET]], i8* align {{[0-9]+}} [[CASTRES]], i64 5 // CHECK: [[CAST:%.*]] = bitcast [[STRUCT]]* [[RET]] to { i64 }* // CHECK: [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, i64* [[GEP]], align 1 @@ -258,12 +248,8 @@ TEST(union_het_fp) // CHECK-LABEL: define dso_local swiftcc i64 @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[UNION:%.*]], align 8 -// CHECK: [[RES:%.*]] = alloca [[UNION]], align 8 -// CHECK: [[CAST:%.*]] = bitcast [[UNION]]* [[RES]] to i8* +// CHECK: [[CAST:%.*]] = bitcast [[UNION]]* [[RET]] to i8* // CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[CAST]] -// CHECK: [[CASTRET:%.*]] = bitcast [[UNION]]* [[RET]] to i8* -// CHECK: [[CASTRES:%.*]] = bitcast [[UNION]]* [[RES]] to i8* -// CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} [[CASTRET]], i8* align {{[0-9]+}} [[CASTRES]] // CHECK: [[CAST:%.*]] = bitcast [[UNION]]* [[RET]] to { i64 }* // CHECK: [[GEP:%.*]] = getelementptr inbounds { i64 }, { i64 }* [[CAST]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, i64* [[GEP]], align 8 Index: test/CodeGenObjC/objc-non-trivial-struct-nrvo.m =================================================================== --- test/CodeGenObjC/objc-non-trivial-struct-nrvo.m +++ test/CodeGenObjC/objc-non-trivial-struct-nrvo.m @@ -0,0 +1,134 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -fobjc-arc -fobjc-runtime-has-weak -o - %s | FileCheck %s + +// CHECK: %[[STRUCT_TRIVIAL:.*]] = type { i32 } +// CHECK: %[[STRUCT_TRIVIALBIG:.*]] = type { [64 x i32] } +// CHECK: %[[STRUCT_STRONG:.*]] = type { i8* } +// CHECK: %[[STRUCT_WEAK:.*]] = type { i8* } + +typedef struct { + int x; +} Trivial; + +typedef struct { + int x[64]; +} TrivialBig; + +typedef struct { + id x; +} Strong; + +typedef struct { + __weak id x; +} Weak; + +// CHECK: define i32 @testTrivial() +// CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_TRIVIAL]], align 4 +// CHECK-NEXT: call void @func0(%[[STRUCT_TRIVIAL]]* %[[RETVAL]]) +// CHECK-NOT: memcpy +// CHECK: ret i32 % + +void func0(Trivial *); + +Trivial testTrivial(void) { + Trivial a; + func0(&a); + return a; +} + +void func1(TrivialBig *); + +// CHECK: define void @testTrivialBig(%[[STRUCT_TRIVIALBIG]]* noalias sret %[[AGG_RESULT:.*]]) +// CHECK-NOT: alloca +// CHECK: call void @func1(%[[STRUCT_TRIVIALBIG]]* %[[AGG_RESULT]]) +// CHECK-NEXT: ret void + +TrivialBig testTrivialBig(void) { + TrivialBig a; + func1(&a); + return a; +} + +// CHECK: define i8* @testStrong() +// CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_STRONG]], align 8 +// CHECK: %[[NRVO:.*]] = alloca i1, align 1 +// CHECK: %[[V0:.*]] = bitcast %[[STRUCT_STRONG]]* %[[RETVAL]] to i8** +// CHECK: call void @__default_constructor_8_s0(i8** %[[V0]]) +// CHECK: store i1 true, i1* %[[NRVO]], align 1 +// CHECK: %[[NRVO_VAL:.*]] = load i1, i1* %[[NRVO]], align 1 +// CHECK: br i1 %[[NRVO_VAL]], + +// CHECK: %[[V1:.*]] = bitcast %[[STRUCT_STRONG]]* %[[RETVAL]] to i8** +// CHECK: call void @__destructor_8_s0(i8** %[[V1]]) +// CHECK: br + +// CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_STRONG]], %[[STRUCT_STRONG]]* %[[RETVAL]], i32 0, i32 0 +// CHECK: %[[V2:.*]] = load i8*, i8** %[[COERCE_DIVE]], align 8 +// CHECK: ret i8* %[[V2]] + +Strong testStrong(void) { + Strong a; + return a; +} + +// CHECK: define void @testWeak(%[[STRUCT_WEAK]]* noalias sret %[[AGG_RESULT:.*]]) +// CHECK: %[[NRVO:.*]] = alloca i1, align 1 +// CHECK: %[[V0:.*]] = bitcast %[[STRUCT_WEAK]]* %[[AGG_RESULT]] to i8** +// CHECK: call void @__default_constructor_8_w0(i8** %[[V0]]) +// CHECK: store i1 true, i1* %[[NRVO]], align 1 +// CHECK: %[[NRVO_VAL:.*]] = load i1, i1* %[[NRVO]], align 1 +// CHECK: br i1 %[[NRVO_VAL]], + +// CHECK: %[[V1:.*]] = bitcast %[[STRUCT_WEAK]]* %[[AGG_RESULT]] to i8** +// CHECK: call void @__destructor_8_w0(i8** %[[V1]]) +// CHECK: br + +// CHECK-NOT: call +// CHECK: ret void + +Weak testWeak(void) { + Weak a; + return a; +} + +// CHECK: define void @testWeak2( +// CHECK: call void @__default_constructor_8_w0( +// CHECK: call void @__default_constructor_8_w0( +// CHECK: call void @__copy_constructor_8_8_w0( +// CHECK: call void @__copy_constructor_8_8_w0( +// CHECK: call void @__destructor_8_w0( +// CHECK: call void @__destructor_8_w0( + +Weak testWeak2(int c) { + Weak a, b; + if (c) + return a; + else + return b; +} + +// CHECK: define internal void @"\01-[C1 foo1]"(%[[STRUCT_WEAK]]* noalias sret %[[AGG_RESULT:.*]], %{{.*}}* %{{.*}}, i8* %{{.*}}) +// CHECK: %[[NRVO:.*]] = alloca i1, align 1 +// CHECK: %[[V0:.*]] = bitcast %[[STRUCT_WEAK]]* %[[AGG_RESULT]] to i8** +// CHECK: call void @__default_constructor_8_w0(i8** %[[V0]]) +// CHECK: store i1 true, i1* %[[NRVO]], align 1 +// CHECK: %[[NRVO_VAL:.*]] = load i1, i1* %[[NRVO]], align 1 +// CHECK: br i1 %[[NRVO_VAL]], + +// CHECK: %[[V1:.*]] = bitcast %[[STRUCT_WEAK]]* %[[AGG_RESULT]] to i8** +// CHECK: call void @__destructor_8_w0(i8** %[[V1]]) +// CHECK: br + +// CHECK-NOT: call +// CHECK: ret void + +__attribute__((objc_root_class)) +@interface C1 +- (Weak)foo1; +@end + +@implementation C1 +- (Weak)foo1 { + Weak a; + return a; +} +@end