Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -420,7 +420,7 @@ /// the CurPtr buffer. DL is the DataLayout. bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, unsigned BytesLeft, const DataLayout &DL) { - assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) && + assert(ByteOffset <= DL.getTypeAllocSize(C->getType()).getKnownMinValue() && "Out of range access"); // If this element is zero or undefined, we can just return since *CurPtr is @@ -499,7 +499,9 @@ // not reached. } - if (isa(C) || isa(C) || + Constant *SplatVal = + isa(C->getType()) ? C->getSplatValue(false) : nullptr; + if (SplatVal || isa(C) || isa(C) || isa(C)) { uint64_t NumElts; Type *EltTy; @@ -507,16 +509,17 @@ NumElts = AT->getNumElements(); EltTy = AT->getElementType(); } else { - NumElts = cast(C->getType())->getNumElements(); - EltTy = cast(C->getType())->getElementType(); + NumElts = + cast(C->getType())->getElementCount().getKnownMinValue(); + EltTy = cast(C->getType())->getElementType(); } - uint64_t EltSize = DL.getTypeAllocSize(EltTy); + uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedSize(); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; for (; Index != NumElts; ++Index) { - if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, - BytesLeft, DL)) + Constant *El = SplatVal ? SplatVal : C->getAggregateElement(Index); + if (!ReadDataFromGlobal(El, Offset, CurPtr, BytesLeft, DL)) return false; uint64_t BytesWritten = EltSize - Offset; @@ -589,15 +592,15 @@ if (BytesLoaded > 32 || BytesLoaded == 0) return nullptr; - int64_t InitializerSize = DL.getTypeAllocSize(C->getType()).getFixedSize(); + TypeSize InitializerSize = DL.getTypeAllocSize(C->getType()); // If we're not accessing anything in this constant, the result is undefined. if (Offset <= -1 * static_cast(BytesLoaded)) return UndefValue::get(IntType); // If we're not accessing anything in this constant, the result is undefined. - if (Offset >= InitializerSize) - return UndefValue::get(IntType); + if (Offset >= static_cast(InitializerSize.getKnownMinValue())) + return InitializerSize.isScalable() ? nullptr : UndefValue::get(IntType); unsigned char RawBytes[32] = {0}; unsigned char *CurPtr = RawBytes; Index: llvm/test/Transforms/InstCombine/load-store-forward.ll =================================================================== --- llvm/test/Transforms/InstCombine/load-store-forward.ll +++ llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -120,3 +120,170 @@ %load = load i32, i32* %p5, align 2 ret i32 %load } + +define i32 @load_i32_store_nxv4i32(i32* %a) vscale_range(1,16) { +; CHECK-LABEL: @load_i32_store_nxv4i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to * +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * [[TMP0]], align 16 +; CHECK-NEXT: ret i32 1 +; +entry: + %0 = bitcast i32* %a to * + store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * %0, align 16 + %1 = load i32, i32* %a, align 4 + ret i32 %1 +} + +define i64 @load_i64_store_nxv8i8(i8* %a) vscale_range(1,16) { +; CHECK-LABEL: @load_i64_store_nxv8i8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[A:%.*]] to * +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer), * [[TMP0]], align 16 +; CHECK-NEXT: ret i64 72340172838076673 +; +entry: + %0 = bitcast i8* %a to * + store shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer), * %0, align 16 + %a2 = bitcast i8* %a to i64* + %load = load i64, i64* %a2, align 8 + ret i64 %load +} + +define i64 @load_i64_store_nxv4i32(i32* %a) vscale_range(1,16) { +; CHECK-LABEL: @load_i64_store_nxv4i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to * +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * [[TMP0]], align 16 +; CHECK-NEXT: ret i64 4294967297 +; +entry: + %0 = bitcast i32* %a to * + store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * %0, align 16 + %a2 = bitcast i32* %a to i64* + %load = load i64, i64* %a2, align 8 + ret i64 %load +} + +define i8 @load_i8_store_nxv4i32(i32* %a) vscale_range(1,16) { +; LITTLE-LABEL: @load_i8_store_nxv4i32( +; LITTLE-NEXT: entry: +; LITTLE-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to * +; LITTLE-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * [[TMP0]], align 16 +; LITTLE-NEXT: ret i8 1 +; +; BIG-LABEL: @load_i8_store_nxv4i32( +; BIG-NEXT: entry: +; BIG-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to * +; BIG-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * [[TMP0]], align 16 +; BIG-NEXT: ret i8 0 +; +entry: + %0 = bitcast i32* %a to * + store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * %0, align 16 + %a2 = bitcast i32* %a to i8* + %load = load i8, i8* %a2, align 1 + ret i8 %load +} + +define float @load_f32_store_nxv4f32(float* %a) vscale_range(1,16) { +; CHECK-LABEL: @load_f32_store_nxv4f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A:%.*]] to * +; CHECK-NEXT: store shufflevector ( insertelement ( poison, float 1.000000e+00, i64 0), poison, zeroinitializer), * [[TMP0]], align 16 +; CHECK-NEXT: ret float 1.000000e+00 +; +entry: + %0 = bitcast float* %a to * + store shufflevector ( insertelement ( poison, float 1.0, i64 0), poison, zeroinitializer), * %0, align 16 + %1 = load float, float* %a, align 4 + ret float %1 +} + +define i32 @load_i32_store_nxv4f32(float* %a) vscale_range(1,16) { +; CHECK-LABEL: @load_i32_store_nxv4f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A:%.*]] to * +; CHECK-NEXT: store shufflevector ( insertelement ( poison, float 1.000000e+00, i64 0), poison, zeroinitializer), * [[TMP0]], align 16 +; CHECK-NEXT: ret i32 1065353216 +; +entry: + %0 = bitcast float* %a to * + store shufflevector ( insertelement ( poison, float 1.0, i64 0), poison, zeroinitializer), * %0, align 16 + %a2 = bitcast float* %a to i32* + %load = load i32, i32* %a2, align 4 + ret i32 %load +} + +define <4 x i32> @load_v4i32_store_nxv4i32(i32* %a) vscale_range(1,16) { +; CHECK-LABEL: @load_v4i32_store_nxv4i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to * +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * [[TMP0]], align 16 +; CHECK-NEXT: ret <4 x i32> +; +entry: + %0 = bitcast i32* %a to * + store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * %0, align 16 + %1 = bitcast i32* %a to <4 x i32>* + %2 = load <4 x i32>, <4 x i32>* %1, align 16 + ret <4 x i32> %2 +} + +define <4 x i16> @load_v4i16_store_nxv4i32(i32* %a) vscale_range(1,16) { +; LITTLE-LABEL: @load_v4i16_store_nxv4i32( +; LITTLE-NEXT: entry: +; LITTLE-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to * +; LITTLE-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * [[TMP0]], align 16 +; LITTLE-NEXT: ret <4 x i16> +; +; BIG-LABEL: @load_v4i16_store_nxv4i32( +; BIG-NEXT: entry: +; BIG-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to * +; BIG-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * [[TMP0]], align 16 +; BIG-NEXT: ret <4 x i16> +; +entry: + %0 = bitcast i32* %a to * + store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * %0, align 16 + %1 = bitcast i32* %a to <4 x i16>* + %2 = load <4 x i16>, <4 x i16>* %1, align 16 + ret <4 x i16> %2 +} + +; Loaded data type exceeds the known minimum size of the store. +define i64 @load_i64_store_nxv4i8(i8* %a) vscale_range(1,16) { +; CHECK-LABEL: @load_i64_store_nxv4i8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[A:%.*]] to * +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer), * [[TMP0]], align 16 +; CHECK-NEXT: [[A2:%.*]] = bitcast i8* [[A]] to i64* +; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64* [[A2]], align 8 +; CHECK-NEXT: ret i64 [[LOAD]] +; +entry: + %0 = bitcast i8* %a to * + store shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer), * %0, align 16 + %a2 = bitcast i8* %a to i64* + %load = load i64, i64* %a2, align 8 + ret i64 %load +} + +; Loaded data size is unknown - we cannot guarantee it won't +; exceed the store size. +define @load_nxv4i8_store_nxv4i32(i32* %a) vscale_range(1,16) { +; CHECK-LABEL: @load_nxv4i8_store_nxv4i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to * +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * [[TMP0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to * +; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16 +; CHECK-NEXT: ret [[TMP2]] +; +entry: + %0 = bitcast i32* %a to * + store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), * %0, align 16 + %1 = bitcast i32* %a to * + %2 = load , * %1, align 16 + ret %2 +}