diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c @@ -255,22 +255,20 @@ // CHECK-256-NEXT: entry: // CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca , align 16 // CHECK-256-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]] -// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to i32* -// CHECK-256-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 16, [[TBAA6]] -// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1 -// CHECK-256-NEXT: [[TMP2:%.*]] = bitcast [3 x <4 x i8>]* [[Y]] to i32* -// CHECK-256-NEXT: store i32 [[TMP1]], i32* [[TMP2]], align 2, [[TBAA6]] +// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <4 x i8>* +// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 16, [[TBAA6]] +// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 +// CHECK-256-NEXT: store <4 x i8> [[TMP1]], <4 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]] // CHECK-256-NEXT: ret void // // CHECK-512-LABEL: @write_bool( // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca , align 16 // CHECK-512-NEXT: store [[X:%.*]], * [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]] -// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to i64* -// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]] -// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1 -// CHECK-512-NEXT: [[TMP2:%.*]] = bitcast [3 x <8 x i8>]* [[Y]] to i64* -// CHECK-512-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 2, [[TBAA6]] +// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[X_ADDR]] to <8 x i8>* +// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]] +// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0 +// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]] // CHECK-512-NEXT: ret void // void write_bool(struct struct_bool *s, svbool_t x) { diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -169,28 +169,24 @@ // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64* -// CHECK-NEXT: [[OP113:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP2]] to * -// CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP2]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP2]] to i64* -// CHECK-NEXT: [[OP224:%.*]] = load i64, i64* [[TMP3]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP113]], i64* [[TMP4]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP224]], i64* [[TMP5]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP7:%.*]] = load , * [[TMP6]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to * -// CHECK-NEXT: [[TMP9:%.*]] = load , * [[TMP8]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP10:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP7]], [[TMP9]]) -// CHECK-NEXT: store [[TMP10]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13:!tbaa !.*]] -// CHECK-NEXT: [[TMP11:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP13:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* -// CHECK-NEXT: store i64 [[TMP12]], i64* [[TMP13]], align 16 -// CHECK-NEXT: [[TMP14:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP14]] +// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP2]] to * +// CHECK-NEXT: store [[OP2_COERCE:%.*]], * [[TMP1]], align 16 +// CHECK-NEXT: [[OP22:%.*]] = load <8 x i8>, <8 x i8>* [[OP2]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x i8> [[OP22]], <8 x i8>* [[OP2_ADDR]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * +// CHECK-NEXT: [[TMP3:%.*]] = load , * [[TMP2]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to * +// CHECK-NEXT: [[TMP5:%.*]] = load , * [[TMP4]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP6:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP3]], [[TMP5]]) +// CHECK-NEXT: store [[TMP6]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13:!tbaa !.*]] +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x i8>* +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] +// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* +// CHECK-NEXT: store <8 x i8> [[TMP7]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load , * [[RETVAL_COERCE]], align 16 +// CHECK-NEXT: ret [[TMP8]] // fixed_bool_t call_bool_ff(svbool_t pg, fixed_bool_t op1, fixed_bool_t op2) { return svsel(pg, op1, op2); @@ -260,20 +256,18 @@ // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to * // CHECK-NEXT: store [[OP1_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64* -// CHECK-NEXT: [[OP112:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64* -// CHECK-NEXT: store i64 [[OP112]], i64* [[TMP2]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP5:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP4]], [[OP2:%.*]]) -// CHECK-NEXT: store [[TMP5]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]] -// CHECK-NEXT: [[TMP6:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP8:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* -// CHECK-NEXT: store i64 [[TMP7]], i64* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to * +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[TMP2]], [[OP2:%.*]]) +// CHECK-NEXT: store [[TMP3]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]] +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x i8>* +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] +// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* +// CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load , * [[RETVAL_COERCE]], align 16 +// CHECK-NEXT: ret [[TMP5]] // fixed_bool_t call_bool_fs(svbool_t pg, fixed_bool_t op1, svbool_t op2) { return svsel(pg, op1, op2); @@ -325,12 +319,12 @@ // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: [[TMP0:%.*]] = call @llvm.aarch64.sve.sel.nxv16i1( [[PG:%.*]], [[OP1:%.*]], [[OP2:%.*]]) // CHECK-NEXT: store [[TMP0]], * [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]] -// CHECK-NEXT: [[TMP1:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to i64* -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP3:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* -// CHECK-NEXT: store i64 [[TMP2]], i64* [[TMP3]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_CALL_RVALUE]] to <8 x i8>* +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]] +// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* +// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 +// CHECK-NEXT: ret [[TMP2]] // fixed_bool_t call_bool_ss(svbool_t pg, svbool_t op1, svbool_t op2) { return svsel(pg, op1, op2); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -81,13 +81,11 @@ // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <8 x i8>, align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[TYPE]] to * // CHECK-NEXT: store [[TYPE_COERCE:%.*]], * [[TMP0]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE]] to i64* -// CHECK-NEXT: [[TYPE12:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to i64* -// CHECK-NEXT: store i64 [[TYPE12]], i64* [[TMP2]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to * -// CHECK-NEXT: [[TMP4:%.*]] = load , * [[TMP3]], align 16, [[TBAA6]] -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TYPE1:%.*]] = load <8 x i8>, <8 x i8>* [[TYPE]], align 16, [[TBAA6]] +// CHECK-NEXT: store <8 x i8> [[TYPE1]], <8 x i8>* [[TYPE_ADDR]], align 16, [[TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to * +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16, [[TBAA6]] +// CHECK-NEXT: ret [[TMP2]] // svbool_t to_svbool_t(fixed_bool_t type) { return type; @@ -98,12 +96,12 @@ // CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca , align 16 // CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 // CHECK-NEXT: store [[TYPE:%.*]], * [[TYPE_ADDR]], align 16, [[TBAA13:!tbaa !.*]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to i64* -// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]] -// CHECK-NEXT: [[TMP2:%.*]] = bitcast * [[RETVAL_COERCE]] to i64* -// CHECK-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load , * [[RETVAL_COERCE]], align 16 -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast * [[TYPE_ADDR]] to <8 x i8>* +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]] +// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast * [[RETVAL_COERCE]] to <8 x i8>* +// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load , * [[RETVAL_COERCE]], align 16 +// CHECK-NEXT: ret [[TMP2]] // fixed_bool_t from_svbool_t(svbool_t type) { return type; diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c @@ -72,9 +72,9 @@ // CHECK-512-NEXT: entry: // CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca , align 16 // CHECK-512-NEXT: store [[V:%.*]], * [[V_ADDR]], align 16, [[TBAA13:!tbaa !.*]] -// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to i64* -// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA10]] -// CHECK-512-NEXT: store i64 [[TMP1]], i64* bitcast (<8 x i8>* @global_bool to i64*), align 2, [[TBAA10]] +// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast * [[V_ADDR]] to <8 x i8>* +// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA10]] +// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* @global_bool, align 2, [[TBAA10]] // CHECK-512-NEXT: ret void // void write_global_bool(svbool_t v) { global_bool = v; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -554,42 +554,8 @@ if (LI.getPointerOperand()->isSwiftError()) return nullptr; - Type *Ty = LI.getType(); const DataLayout &DL = IC.getDataLayout(); - // Try to canonicalize loads which are only ever stored to operate over - // integers instead of any other type. We only do this when the loaded type - // is sized and has a size exactly the same as its store size and the store - // size is a legal integer type. - // Do not perform canonicalization if minmax pattern is found (to avoid - // infinite loop). - Type *Dummy; - if (!Ty->isIntegerTy() && Ty->isSized() && !isa(Ty) && - DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) && - DL.typeSizeEqualsStoreSize(Ty) && !DL.isNonIntegralPointerType(Ty) && - !isMinMaxWithLoads(InstCombiner::peekThroughBitcast( - LI.getPointerOperand(), /*OneUseOnly=*/true), - Dummy)) { - if (all_of(LI.users(), [&LI](User *U) { - auto *SI = dyn_cast(U); - return SI && SI->getPointerOperand() != &LI && - !SI->getPointerOperand()->isSwiftError(); - })) { - LoadInst *NewLoad = IC.combineLoadToNewType( - LI, Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty))); - // Replace all the stores with stores of the newly loaded value. - for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { - auto *SI = cast(*UI++); - IC.Builder.SetInsertPoint(SI); - combineStoreToNewValue(IC, *SI, NewLoad); - IC.eraseInstFromFunction(*SI); - } - assert(LI.use_empty() && "Failed to remove all users of the load!"); - // Return the old load so the combiner can delete it safely. - return &LI; - } - } - // Fold away bit casts of the loaded value by loading the desired type. // We can do this for BitCastInsts as well as casts from and to pointer types, // as long as those are noops (i.e., the source or dest type have the same diff --git a/llvm/test/Transforms/InstCombine/atomic.ll b/llvm/test/Transforms/InstCombine/atomic.ll --- a/llvm/test/Transforms/InstCombine/atomic.ll +++ b/llvm/test/Transforms/InstCombine/atomic.ll @@ -325,11 +325,9 @@ define i32 @test18(float* %p) { ; CHECK-LABEL: @test18( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to i32* -; CHECK-NEXT: [[X1:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4 +; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] unordered, align 4 ; CHECK-NEXT: call void @clobber() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[P]] to i32* -; CHECK-NEXT: store atomic i32 [[X1]], i32* [[TMP2]] unordered, align 4 +; CHECK-NEXT: store atomic float [[X]], float* [[P]] unordered, align 4 ; CHECK-NEXT: ret i32 0 ; %x = load atomic float, float* %p unordered, align 4 @@ -376,10 +374,8 @@ define void @pr27490a(i8** %p1, i8** %p2) { ; CHECK-LABEL: @pr27490a( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[P1:%.*]] to i64* -; CHECK-NEXT: [[L1:%.*]] = load i64, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[P2:%.*]] to i64* -; CHECK-NEXT: store volatile i64 [[L1]], i64* [[TMP2]], align 8 +; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8 +; CHECK-NEXT: store volatile i8* [[L]], i8** [[P2:%.*]], align 8 ; CHECK-NEXT: ret void ; %l = load i8*, i8** %p1 @@ -389,10 +385,8 @@ define void @pr27490b(i8** %p1, i8** %p2) { ; CHECK-LABEL: @pr27490b( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[P1:%.*]] to i64* -; CHECK-NEXT: [[L1:%.*]] = load i64, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[P2:%.*]] to i64* -; CHECK-NEXT: store atomic i64 [[L1]], i64* [[TMP2]] seq_cst, align 8 +; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8 +; CHECK-NEXT: store atomic i8* [[L]], i8** [[P2:%.*]] seq_cst, align 8 ; CHECK-NEXT: ret void ; %l = load i8*, i8** %p1 diff --git a/llvm/test/Transforms/InstCombine/load.ll b/llvm/test/Transforms/InstCombine/load.ll --- a/llvm/test/Transforms/InstCombine/load.ll +++ b/llvm/test/Transforms/InstCombine/load.ll @@ -205,18 +205,16 @@ define void @test16(i8* %x, i8* %a, i8* %b, i8* %c) { ; CHECK-LABEL: @test16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; CHECK-NEXT: [[X11:%.*]] = load i32, i32* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A:%.*]] to i32* -; CHECK-NEXT: store i32 [[X11]], i32* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[B:%.*]] to i32* -; CHECK-NEXT: store i32 [[X11]], i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[X]] to i32* -; CHECK-NEXT: [[X22:%.*]] = load i32, i32* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[B]] to i32* -; CHECK-NEXT: store i32 [[X22]], i32* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[C:%.*]] to i32* -; CHECK-NEXT: store i32 [[X22]], i32* [[TMP5]], align 4 +; CHECK-NEXT: [[X_CAST:%.*]] = bitcast i8* [[X:%.*]] to float* +; CHECK-NEXT: [[A_CAST:%.*]] = bitcast i8* [[A:%.*]] to float* +; CHECK-NEXT: [[B_CAST:%.*]] = bitcast i8* [[B:%.*]] to float* +; CHECK-NEXT: [[X1:%.*]] = load float, float* [[X_CAST]], align 4 +; CHECK-NEXT: store float [[X1]], float* [[A_CAST]], align 4 +; CHECK-NEXT: store float [[X1]], float* [[B_CAST]], align 4 +; CHECK-NEXT: [[X2:%.*]] = load float, float* [[X_CAST]], align 4 +; CHECK-NEXT: store float [[X2]], float* [[B_CAST]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[C:%.*]] to float* +; CHECK-NEXT: store float [[X2]], float* [[TMP0]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -240,18 +238,16 @@ define void @test16-vect(i8* %x, i8* %a, i8* %b, i8* %c) { ; CHECK-LABEL: @test16-vect( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; CHECK-NEXT: [[X11:%.*]] = load i32, i32* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A:%.*]] to i32* -; CHECK-NEXT: store i32 [[X11]], i32* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[B:%.*]] to i32* -; CHECK-NEXT: store i32 [[X11]], i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[X]] to i32* -; CHECK-NEXT: [[X22:%.*]] = load i32, i32* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[B]] to i32* -; CHECK-NEXT: store i32 [[X22]], i32* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[C:%.*]] to i32* -; CHECK-NEXT: store i32 [[X22]], i32* [[TMP5]], align 4 +; CHECK-NEXT: [[X_CAST:%.*]] = bitcast i8* [[X:%.*]] to <4 x i8>* +; CHECK-NEXT: [[A_CAST:%.*]] = bitcast i8* [[A:%.*]] to <4 x i8>* +; CHECK-NEXT: [[B_CAST:%.*]] = bitcast i8* [[B:%.*]] to <4 x i8>* +; CHECK-NEXT: [[X1:%.*]] = load <4 x i8>, <4 x i8>* [[X_CAST]], align 4 +; CHECK-NEXT: store <4 x i8> [[X1]], <4 x i8>* [[A_CAST]], align 4 +; CHECK-NEXT: store <4 x i8> [[X1]], <4 x i8>* [[B_CAST]], align 4 +; CHECK-NEXT: [[X2:%.*]] = load <4 x i8>, <4 x i8>* [[X_CAST]], align 4 +; CHECK-NEXT: store <4 x i8> [[X2]], <4 x i8>* [[B_CAST]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[C:%.*]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> [[X2]], <4 x i8>* [[TMP0]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll --- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll +++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll @@ -161,24 +161,11 @@ } define void @test_load_cast_combine_nonnull(float** %ptr) { -; We can't preserve nonnull metadata when converting a load of a pointer to -; a load of an integer. Instead, we translate it to range metadata. -; FIXME: We should also transform range metadata back into nonnull metadata. -; FIXME: This test is very fragile. If any LABEL lines are added after -; this point, the test will fail, because this test depends on a metadata tuple, -; which is always emitted at the end of the file. At some point, we should -; consider an option to the IR printer to emit MD tuples after the function -; that first uses them--this will allow us to refer to them like this and not -; have the tests break. For now, this function must always come last in this -; file, and no LABEL lines are to be added after this point. -; ; CHECK-LABEL: @test_load_cast_combine_nonnull( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float** [[PTR:%.*]] to i64* -; CHECK-NEXT: [[P1:%.*]] = load i64, i64* [[TMP0]], align 8, !range ![[MD:[0-9]+]] +; CHECK-NEXT: [[P:%.*]] = load float*, float** [[PTR:%.*]], align 8, !nonnull !7 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float*, float** [[PTR]], i64 42 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float** [[GEP]] to i64* -; CHECK-NEXT: store i64 [[P1]], i64* [[TMP1]], align 8 +; CHECK-NEXT: store float* [[P]], float** [[GEP]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -188,8 +175,6 @@ ret void } -; This is the metadata tuple that we reference above: -; CHECK: ![[MD]] = !{i64 1, i64 0} !0 = !{!1, !1, i64 0} !1 = !{!"scalar type", !2} !2 = !{!"root"} diff --git a/llvm/test/Transforms/InstCombine/non-integral-pointers.ll b/llvm/test/Transforms/InstCombine/non-integral-pointers.ll --- a/llvm/test/Transforms/InstCombine/non-integral-pointers.ll +++ b/llvm/test/Transforms/InstCombine/non-integral-pointers.ll @@ -41,10 +41,8 @@ ; integers, since pointers in address space 3 are integral. ; CHECK-LABEL: @f_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 addrspace(3)** [[PTR0:%.*]] to i64* -; CHECK-NEXT: [[VAL1:%.*]] = load i64, i64* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(3)** [[PTR1:%.*]] to i64* -; CHECK-NEXT: store i64 [[VAL1]], i64* [[TMP1]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8 addrspace(3)*, i8 addrspace(3)** [[PTR0:%.*]], align 8 +; CHECK-NEXT: store i8 addrspace(3)* [[VAL]], i8 addrspace(3)** [[PTR1:%.*]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -79,13 +77,13 @@ define i64 @g2(i8* addrspace(4)* %gp) { ; CHECK-LABEL: @g2( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* addrspace(4)* [[GP:%.*]] to i64 addrspace(4)* -; CHECK-NEXT: [[DOTPRE1:%.*]] = load i64, i64 addrspace(4)* [[TMP1]], align 8 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i8*, i8* addrspace(4)* [[GP:%.*]], align 8 ; CHECK-NEXT: [[V74:%.*]] = call i8 addrspace(4)* @alloc() ; CHECK-NEXT: [[V77:%.*]] = getelementptr i8, i8 addrspace(4)* [[V74]], i64 -8 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(4)* [[V77]] to i64 addrspace(4)* -; CHECK-NEXT: store i64 [[DOTPRE1]], i64 addrspace(4)* [[TMP2]], align 8 -; CHECK-NEXT: ret i64 [[DOTPRE1]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[V77]] to i8* addrspace(4)* +; CHECK-NEXT: store i8* [[DOTPRE]], i8* addrspace(4)* [[TMP1]], align 8 +; CHECK-NEXT: [[V81_CAST:%.*]] = ptrtoint i8* [[DOTPRE]] to i64 +; CHECK-NEXT: ret i64 [[V81_CAST]] ; %.pre = load i8*, i8* addrspace(4)* %gp, align 8 %v74 = call i8 addrspace(4)* @alloc() diff --git a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll --- a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll @@ -50,10 +50,10 @@ define dso_local void @_Z3gen1S(%0* noalias sret align 8 %arg, %0* byval(%0) align 8 %arg1) { ; CHECK-LABEL: @_Z3gen1S( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %0* [[ARG1:%.*]] to i64* -; CHECK-NEXT: [[I21:%.*]] = load i64, i64* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast %0* [[ARG:%.*]] to i64* -; CHECK-NEXT: store i64 [[I21]], i64* [[TMP1]], align 8 +; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG1:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[I2:%.*]] = load i32*, i32** [[I]], align 8 +; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 +; CHECK-NEXT: store i32* [[I2]], i32** [[I3]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -68,12 +68,12 @@ ; CHECK-LABEL: @_Z3foo1S( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8 -; CHECK-NEXT: [[TMP0]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD15:%.*]] = load i32*, i32** [[TMP0]], align 8 +; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I5_SROA_IDX]], align 8 ; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I2]], i64 0, i32 0 -; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD15]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 +; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 ; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval(%0) align 8 [[I2]]) -; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD15]] +; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] ; bb: %i = alloca %0, align 8 @@ -107,21 +107,21 @@ define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3bar1S( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD14:%.*]] = load i32*, i32** [[TMP0]], align 8 +; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I4_SROA_IDX]], align 8 ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv() ; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0 ; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]] ; CHECK: bb7: ; CHECK-NEXT: tail call void @_Z5sync0v() -; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD14]]) +; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) ; CHECK-NEXT: br label [[BB13:%.*]] ; CHECK: bb10: ; CHECK-NEXT: tail call void @_Z5sync1v() -; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD14]]) +; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) ; CHECK-NEXT: br label [[BB13]] ; CHECK: bb13: -; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD14]] +; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] ; bb: %i = alloca %0, align 8