This is an archive of the discontinued LLVM Phabricator instance.

[GVN] Use vector ops when doing loadCoercion on a vector value
Needs ReviewPublic

Authored by ManuelJBrito on Jun 30 2023, 8:58 AM.

Download Raw Diff

Details

Reviewers

fhahn
nikic
foad
Carrot

Summary

Currently bitmasking is used for loadCoercion of vector values, this is troublesome because these operations propagate poison.
This patch uses a combination of vector operations instead.
Fixes PR63059.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

ManuelJBrito created this revision.Jun 30 2023, 8:58 AM

Herald added a project: Restricted Project. · View Herald TranscriptJun 30 2023, 8:58 AM

Herald added subscribers: StephenFan, hiraditya. · View Herald Transcript

ManuelJBrito requested review of this revision.Jun 30 2023, 8:58 AM

Herald added a project: Restricted Project. · View Herald TranscriptJun 30 2023, 8:58 AM

Herald added a subscriber: llvm-commits. · View Herald Transcript

Harbormaster completed remote builds in B242446: Diff 536280.Jun 30 2023, 10:21 AM

foad added inline comments.Jul 1 2023, 4:01 AM

llvm/lib/Transforms/Utils/VNCoercion.cpp
316	Do you also need to do this for the vector-of-pointers case that is handled above?

ManuelJBrito added inline comments.Jul 1 2023, 10:05 AM

llvm/lib/Transforms/Utils/VNCoercion.cpp
316	Yes, the vector-of-pointers should be converted to a vector-of-ints and then the rest of the logic would work fine for that case. The problem is that AFAICT we can't just bitcast it to a vector-of-ints of the correct size.

Handle vector-of-ptrs.

ping :)

Adding Guozhi Wei as a reviewer.

ping :)

Herald added a subscriber: sunshaoce. · View Herald TranscriptAug 17 2023, 7:42 AM

This causes an assertion failure:

define i11 @test(ptr %loc, <4 x i6> %v) {
  store <4 x i6> %v, ptr  %loc
  %ref = load i11, ptr %loc
  ret i11 %ref
}

Update : fill mask with poison mask elem in shufflevector

Harbormaster completed remote builds in B253456: Diff 551462.Aug 18 2023, 5:06 AM

ping :)

It looks like your last update dropped the vector of pointer tests?

nikic added inline comments.Aug 28 2023, 8:09 AM

llvm/lib/Transforms/Utils/VNCoercion.cpp

321

Invert condition and early return?

377

This looks incorrect to me for the case where the division isn't exact, e.g. this is a miscompile:

define i16 @test(ptr %loc, <4 x i16> %v) {
; CHECK-LABEL: define i16 @test
; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i16> [[V:%.*]]) {
; CHECK-NEXT:    store <4 x i16> [[V]], ptr [[LOC]], align 8
; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[LOC]], i64 1
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i16> [[V]], i64 0
; CHECK-NEXT:    ret i16 [[TMP1]]
;
  store <4 x i16> %v, ptr %loc
  %gep = getelementptr i8, ptr %loc, i64 1
  %ref = load i16, ptr %gep
  ret i16 %ref
}

The loaded value is the high part of elements zero and the low part of element 1, not just element 0.

ManuelJBrito added inline comments.Aug 28 2023, 9:49 AM

llvm/lib/Transforms/Utils/VNCoercion.cpp
377	Nice catch! Thank you! NumEltsRequiredFromVec needs to take into consideration the offset. I'll try to support this case without making this even more complex.

Revision Contents

Path

Size

llvm/

lib/

Transforms/

Utils/

VNCoercion.cpp

69 lines

test/

Transforms/

GVN/

pr63059.ll

125 lines

Diff 551462

llvm/lib/Transforms/Utils/VNCoercion.cpp

#include "llvm/Transforms/Utils/VNCoercion.h"		#include "llvm/Transforms/Utils/VNCoercion.h"
#include "llvm/Analysis/ConstantFolding.h"		#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"		#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"		#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"		#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"
		#include <numeric>

#define DEBUG_TYPE "vncoerce"		#define DEBUG_TYPE "vncoerce"

namespace llvm {		namespace llvm {
namespace VNCoercion {		namespace VNCoercion {

static bool isFirstClassAggregateOrScalableType(Type *Ty) {		static bool isFirstClassAggregateOrScalableType(Type *Ty) {
return Ty->isStructTy() \|\| Ty->isArrayTy() \|\| isa<ScalableVectorType>(Ty);		return Ty->isStructTy() \|\| Ty->isArrayTy() \|\| isa<ScalableVectorType>(Ty);
▲ Show 20 Lines • Show All 292 Lines • ▼ Show 20 Lines	static Value getStoreValueForLoadHelper(Value SrcVal, unsigned Offset,
uint64_t StoreSize =		uint64_t StoreSize =
(DL.getTypeSizeInBits(SrcVal->getType()).getFixedValue() + 7) / 8;		(DL.getTypeSizeInBits(SrcVal->getType()).getFixedValue() + 7) / 8;
uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedValue() + 7) / 8;		uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedValue() + 7) / 8;
// Compute which bits of the stored value are being used by the load. Convert		// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.		// to an integer type to start with.
if (SrcVal->getType()->isPtrOrPtrVectorTy())		if (SrcVal->getType()->isPtrOrPtrVectorTy())
SrcVal =		SrcVal =
Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));		Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
		if (LoadTy->isPtrOrPtrVectorTy())
		foadUnsubmitted Not Done Reply Inline Actions Do you also need to do this for the vector-of-pointers case that is handled above? foad: Do you also need to do this for the vector-of-pointers case that is handled above?
		ManuelJBritoAuthorUnsubmitted Done Reply Inline Actions Yes, the vector-of-pointers should be converted to a vector-of-ints and then the rest of the logic would work fine for that case. The problem is that AFAICT we can't just bitcast it to a vector-of-ints of the correct size. ManuelJBrito: Yes, the vector-of-pointers should be converted to a vector-of-ints and then the rest of the…
		LoadTy = DL.getIntPtrType(LoadTy);

		// If SrcVal is a vector, use vector ops to get the loaded value.
		if (SrcVal->getType()->isVectorTy()) {
		if (StoreSize > LoadSize) {
		nikicUnsubmitted Not Done Reply Inline Actions Invert condition and early return? nikic: Invert condition and early return?
		auto *SrcValTyV = cast<FixedVectorType>(SrcVal->getType());
		unsigned LoadSizeInBits = DL.getTypeSizeInBits(LoadTy).getFixedValue();
		unsigned SrcValEltSizeInBits = SrcValTyV->getScalarSizeInBits();

		// Compute how many elements of SrcVal are necessary. We need to ensure
		// that all bitcasts are between vector types to prevent poison
		// propagation.
		unsigned NumEltsRequiredFromVec =
		std::lcm(SrcValEltSizeInBits, LoadSizeInBits) / SrcValEltSizeInBits;
		if (isa<FixedVectorType>(LoadTy)) {
		auto *LoadTyV = cast<FixedVectorType>(LoadTy);
		unsigned LoadEltSizeInBits = LoadTy->getScalarSizeInBits();

		SmallVector<int, 16> Mask(NumEltsRequiredFromVec, PoisonMaskElem);
		std::iota(Mask.begin(), Mask.begin() + SrcValTyV->getNumElements(), 0);
		SrcVal = Builder.CreateShuffleVector(SrcVal, Mask);

		// A subvector is loaded so we extract it from SrcVal.
		if (SrcVal->getType() != LoadTy) {
		// Bitcast to ensure that SrcVal has the same element type as the
		// loaded vector.
		unsigned NumElems = (NumEltsRequiredFromVec * SrcValEltSizeInBits) /
		LoadEltSizeInBits;

		auto *DestTy = VectorType::get(LoadTy->getScalarType(), NumElems,
		/* Scalable */ false);
		SrcVal = Builder.CreateBitCast(SrcVal, DestTy);
		if (SrcVal->getType() != LoadTy) {
		// Extract the subvector such that SrcVal and the loaded vector
		// have the same size.
		SmallVector<int, 16> Mask2(LoadTyV->getNumElements());
		std::iota(Mask2.begin(), Mask2.end(), 0);
		SrcVal = Builder.CreateShuffleVector(SrcVal, Mask2);
		}
		}
		} else {
		if (SrcVal->getType()->getScalarType() != LoadTy) {
		// Requires a bitcast before the extract.
		auto *DestTy = VectorType::get(LoadTy, NumEltsRequiredFromVec,
		/* Scalable */ false);
		if (SrcValTyV->getPrimitiveSizeInBits() !=
		DestTy->getPrimitiveSizeInBits()) {
		// Extract the subvector to ensure a legal bitcast.
		SmallVector<int, 16> Mask(NumEltsRequiredFromVec, PoisonMaskElem);
		std::iota(Mask.begin(), Mask.begin() + SrcValTyV->getNumElements(),
		0);
		SrcVal = Builder.CreateShuffleVector(SrcVal, Mask);
		// Compute the new vector type from which we can extract the loaded
		// element.
		unsigned NumElems =
		(NumEltsRequiredFromVec * SrcValEltSizeInBits) / LoadSizeInBits;
		DestTy = VectorType::get(LoadTy, NumElems, /* Scalable */ false);
		}
		SrcVal = Builder.CreateBitCast(SrcVal, DestTy);
		}
		unsigned Idx = Offset / LoadSize;
		nikicUnsubmitted Not Done Reply Inline Actions This looks incorrect to me for the case where the division isn't exact, e.g. this is a miscompile: define i16 @test(ptr %loc, <4 x i16> %v) { ; CHECK-LABEL: define i16 @test ; CHECK-SAME: (ptr [[LOC:%.]], <4 x i16> [[V:%.]]) { ; CHECK-NEXT: store <4 x i16> [[V]], ptr [[LOC]], align 8 ; CHECK-NEXT: [[GEP:%.]] = getelementptr i8, ptr [[LOC]], i64 1 ; CHECK-NEXT: [[TMP1:%.]] = extractelement <4 x i16> [[V]], i64 0 ; CHECK-NEXT: ret i16 [[TMP1]] ; store <4 x i16> %v, ptr %loc %gep = getelementptr i8, ptr %loc, i64 1 %ref = load i16, ptr %gep ret i16 %ref } The loaded value is the high part of elements zero and the low part of element 1, not just element 0. nikic: This looks incorrect to me for the case where the division isn't exact, e.g. this is a…
		ManuelJBritoAuthorUnsubmitted Done Reply Inline Actions Nice catch! Thank you! NumEltsRequiredFromVec needs to take into consideration the offset. I'll try to support this case without making this even more complex. ManuelJBrito: Nice catch! Thank you! NumEltsRequiredFromVec needs to take into consideration the offset. I'll…
		SrcVal = Builder.CreateExtractElement(SrcVal, Idx);
		}
		}
		return SrcVal;
		}

if (!SrcVal->getType()->isIntegerTy())		if (!SrcVal->getType()->isIntegerTy())
SrcVal =		SrcVal =
Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));		Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));

// Shift the bits to the least significant depending on endianness.		// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;		unsigned ShiftAmt;
if (DL.isLittleEndian())		if (DL.isLittleEndian())
ShiftAmt = Offset * 8;		ShiftAmt = Offset * 8;
▲ Show 20 Lines • Show All 109 Lines • Show Last 20 Lines

llvm/test/Transforms/GVN/pr63059.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -passes=gvn -S < %s \| FileCheck %s		; RUN: opt -passes=gvn -S < %s \| FileCheck %s


define <4 x float> @ConvertVectors_ByRef(ptr %loc) {		define <4 x float> @ConvertVectors_ByRef(ptr %loc) {
; CHECK-LABEL: define <4 x float> @ConvertVectors_ByRef		; CHECK-LABEL: define <4 x float> @ConvertVectors_ByRef
; CHECK-SAME: (ptr [[LOC:%.*]]) {		; CHECK-SAME: (ptr [[LOC:%.*]]) {
; CHECK-NEXT: [[LOAD_VEC:%.*]] = load <4 x float>, ptr [[LOC]], align 16		; CHECK-NEXT: [[LOAD_VEC:%.*]] = load <4 x float>, ptr [[LOC]], align 16
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[LOAD_VEC]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>		; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[LOAD_VEC]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 1		; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[LOAD_VEC]] to i128		; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[LOAD_VEC]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = lshr i128 [[TMP1]], 32		; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[SHUF]], float [[TMP1]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[SHUF]], float [[TMP4]], i64 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2		; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2
; CHECK-NEXT: [[TMP5:%.*]] = lshr i128 [[TMP1]], 64		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[LOAD_VEC]], i64 2
; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i32		; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[TMP2]], i64 2
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float		; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[INS2]], float [[TMP2]], i64 3
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[TMP7]], i64 2
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[INS2]], float [[TMP7]], i64 3
; CHECK-NEXT: ret <4 x float> [[INS3]]		; CHECK-NEXT: ret <4 x float> [[INS3]]
;		;
%load_vec = load <4 x float>, ptr %loc, align 16		%load_vec = load <4 x float>, ptr %loc, align 16
%shuf = shufflevector <4 x float> %load_vec, <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>		%shuf = shufflevector <4 x float> %load_vec, <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
%gep1 = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 1		%gep1 = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 1
%load1 = load float, ptr %gep1, align 4		%load1 = load float, ptr %gep1, align 4
%ins1 = insertelement <4 x float> %shuf, float %load1, i64 1		%ins1 = insertelement <4 x float> %shuf, float %load1, i64 1
%gep2 = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2		%gep2 = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2
%load2 = load float, ptr %gep2, align 8		%load2 = load float, ptr %gep2, align 8
%ins2 = insertelement <4 x float> %ins1, float %load2, i64 2		%ins2 = insertelement <4 x float> %ins1, float %load2, i64 2
%ins3 = insertelement <4 x float> %ins2, float %load2, i64 3		%ins3 = insertelement <4 x float> %ins2, float %load2, i64 3
ret <4 x float> %ins3		ret <4 x float> %ins3
}		}

define i64 @store_element_smaller_than_load(ptr %loc, <4 x i32> %v) {		define i64 @store_element_smaller_than_load(ptr %loc, <4 x i32> %v) {
; CHECK-LABEL: define i64 @store_element_smaller_than_load		; CHECK-LABEL: define i64 @store_element_smaller_than_load
; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16		; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2		; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128		; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[TMP0]], 64		; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64		; CHECK-NEXT: ret i64 [[TMP1]]
; CHECK-NEXT: ret i64 [[TMP2]]
;		;
entry:		entry:
store <4 x i32> %v, ptr %loc		store <4 x i32> %v, ptr %loc
%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2		%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
%ref = load i64, ptr %gep		%ref = load i64, ptr %gep
ret i64 %ref		ret i64 %ref
}		}

Show All 17 Lines

define i64 @call_before_load_memory_none(ptr %loc, <4 x i32> %v) {		define i64 @call_before_load_memory_none(ptr %loc, <4 x i32> %v) {
; CHECK-LABEL: define i64 @call_before_load_memory_none		; CHECK-LABEL: define i64 @call_before_load_memory_none
; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16		; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
; CHECK-NEXT: call void @f_no_mem(<4 x i32> [[V]])		; CHECK-NEXT: call void @f_no_mem(<4 x i32> [[V]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2		; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128		; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[TMP0]], 64		; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64		; CHECK-NEXT: ret i64 [[TMP1]]
; CHECK-NEXT: ret i64 [[TMP2]]
;		;
entry:		entry:
store <4 x i32> %v, ptr %loc		store <4 x i32> %v, ptr %loc
call void @f_no_mem(<4 x i32> %v)		call void @f_no_mem(<4 x i32> %v)
%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2		%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
%ref = load i64, ptr %gep		%ref = load i64, ptr %gep
ret i64 %ref		ret i64 %ref
}		}

define i64 @call_after_load(ptr %loc, <4 x i32> %v) {		define i64 @call_after_load(ptr %loc, <4 x i32> %v) {
; CHECK-LABEL: define i64 @call_after_load		; CHECK-LABEL: define i64 @call_after_load
; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16		; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2		; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128		; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[TMP0]], 64		; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64
; CHECK-NEXT: call void @f(<4 x i32> [[V]])		; CHECK-NEXT: call void @f(<4 x i32> [[V]])
; CHECK-NEXT: ret i64 [[TMP2]]		; CHECK-NEXT: ret i64 [[TMP1]]
;		;
entry:		entry:
store <4 x i32> %v, ptr %loc		store <4 x i32> %v, ptr %loc
%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2		%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
%ref = load i64, ptr %gep		%ref = load i64, ptr %gep
call void @f(<4 x i32> %v)		call void @f(<4 x i32> %v)
ret i64 %ref		ret i64 %ref
}		}

define double @store_element_smaller_than_load_float(ptr %loc, <4 x float> %v) {		define double @store_element_smaller_than_load_float(ptr %loc, <4 x float> %v) {
; CHECK-LABEL: define double @store_element_smaller_than_load_float		; CHECK-LABEL: define double @store_element_smaller_than_load_float
; CHECK-SAME: (ptr [[LOC:%.]], <4 x float> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <4 x float> [[V:%.]]) {
; CHECK-NEXT: store <4 x float> [[V]], ptr [[LOC]], align 16		; CHECK-NEXT: store <4 x float> [[V]], ptr [[LOC]], align 16
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2		; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V]] to i128		; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V]] to <2 x double>
; CHECK-NEXT: [[TMP2:%.*]] = lshr i128 [[TMP1]], 64		; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i64		; CHECK-NEXT: ret double [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP3]] to double
; CHECK-NEXT: ret double [[TMP4]]
;		;
store <4 x float> %v, ptr %loc		store <4 x float> %v, ptr %loc
%gep = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2		%gep = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2
%ref = load double, ptr %gep		%ref = load double, ptr %gep
ret double %ref		ret double %ref
}		}

define i64 @load_as_scalar(ptr %loc, <2 x i32> %v) {		define i64 @load_as_scalar(ptr %loc, <2 x i32> %v) {
Show All 9 Lines	;
ret i64 %ref		ret i64 %ref
}		}


define i9 @load_as_scalar_larger(ptr %loc, <4 x i6> %v) {		define i9 @load_as_scalar_larger(ptr %loc, <4 x i6> %v) {
; CHECK-LABEL: define i9 @load_as_scalar_larger		; CHECK-LABEL: define i9 @load_as_scalar_larger
; CHECK-SAME: (ptr [[LOC:%.]], <4 x i6> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <4 x i6> [[V:%.]]) {
; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4		; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i6> [[V]] to i24		; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i6> [[V]], <4 x i6> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = trunc i24 [[TMP1]] to i16		; CHECK-NEXT: [[TMP2:%.*]] = bitcast <3 x i6> [[TMP1]] to <2 x i9>
; CHECK-NEXT: [[TMP3:%.*]] = trunc i16 [[TMP2]] to i9		; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i9> [[TMP2]], i64 0
; CHECK-NEXT: ret i9 [[TMP3]]		; CHECK-NEXT: ret i9 [[TMP3]]
;		;
store <4 x i6> %v, ptr %loc		store <4 x i6> %v, ptr %loc
%gep = getelementptr i9, ptr %loc, i64 0		%gep = getelementptr i9, ptr %loc, i64 0
%ref = load i9, ptr %gep		%ref = load i9, ptr %gep
ret i9 %ref		ret i9 %ref
}		}


define i4 @load_as_scalar_smaller(ptr %loc, <4 x i6> %v) {		define i4 @load_as_scalar_smaller(ptr %loc, <4 x i6> %v) {
; CHECK-LABEL: define i4 @load_as_scalar_smaller		; CHECK-LABEL: define i4 @load_as_scalar_smaller
; CHECK-SAME: (ptr [[LOC:%.]], <4 x i6> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <4 x i6> [[V:%.]]) {
; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4		; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i6> [[V]] to i24		; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i6> [[V]], <4 x i6> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = trunc i24 [[TMP1]] to i8		; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i6> [[TMP1]] to <3 x i4>
; CHECK-NEXT: [[TMP3:%.*]] = trunc i8 [[TMP2]] to i4		; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i4> [[TMP2]], i64 0
; CHECK-NEXT: ret i4 [[TMP3]]		; CHECK-NEXT: ret i4 [[TMP3]]
;		;
store <4 x i6> %v, ptr %loc		store <4 x i6> %v, ptr %loc
%gep = getelementptr i4, ptr %loc, i64 0		%gep = getelementptr i4, ptr %loc, i64 0
%ref = load i4, ptr %gep		%ref = load i4, ptr %gep
ret i4 %ref		ret i4 %ref
}		}

Show All 14 Lines	;
ret i32 %r		ret i32 %r
}		}

define i64 @load_vec_same_size_different_type1(ptr %loc, <4 x i32> %v) {		define i64 @load_vec_same_size_different_type1(ptr %loc, <4 x i32> %v) {
; CHECK-LABEL: define i64 @load_vec_same_size_different_type1		; CHECK-LABEL: define i64 @load_vec_same_size_different_type1
; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16		; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128		; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[TMP0]] to <2 x i64>		; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
; CHECK-NEXT: ret i64 [[R]]		; CHECK-NEXT: ret i64 [[R]]
;		;
entry:		entry:
store <4 x i32> %v, ptr %loc		store <4 x i32> %v, ptr %loc
%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0		%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
%ref = load <2 x i64>, ptr %gep		%ref = load <2 x i64>, ptr %gep
%r = extractelement <2 x i64> %ref, i32 1		%r = extractelement <2 x i64> %ref, i32 1
ret i64 %r		ret i64 %r
}		}

define double @load_vec_same_size_different_type2(ptr %loc, <4 x i32> %v) {		define double @load_vec_same_size_different_type2(ptr %loc, <4 x i32> %v) {
; CHECK-LABEL: define double @load_vec_same_size_different_type2		; CHECK-LABEL: define double @load_vec_same_size_different_type2
; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16		; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128		; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <2 x double>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[TMP0]] to <2 x double>		; CHECK-NEXT: [[R:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
; CHECK-NEXT: ret double [[R]]		; CHECK-NEXT: ret double [[R]]
;		;
entry:		entry:
store <4 x i32> %v, ptr %loc		store <4 x i32> %v, ptr %loc
%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0		%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
%ref = load <2 x double>, ptr %gep		%ref = load <2 x double>, ptr %gep
%r = extractelement <2 x double> %ref, i32 1		%r = extractelement <2 x double> %ref, i32 1
ret double %r		ret double %r
}		}

define i32 @load_subvector_same_type(ptr %loc, <4 x i32> %v) {		define i32 @load_subvector_same_type(ptr %loc, <4 x i32> %v) {
; CHECK-LABEL: define i32 @load_subvector_same_type		; CHECK-LABEL: define i32 @load_subvector_same_type
; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <4 x i32> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16		; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128		; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64		; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
; CHECK-NEXT: ret i32 [[R]]		; CHECK-NEXT: ret i32 [[R]]
;		;
entry:		entry:
store <4 x i32> %v, ptr %loc		store <4 x i32> %v, ptr %loc
%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0		%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
%ref = load <2 x i32>, ptr %gep		%ref = load <2 x i32>, ptr %gep
%r = extractelement <2 x i32> %ref, i32 1		%r = extractelement <2 x i32> %ref, i32 1
ret i32 %r		ret i32 %r
}		}

define i64 @load_subvector_different_type(ptr %loc, <8 x i32> %v) {		define i64 @load_subvector_different_type(ptr %loc, <8 x i32> %v) {
; CHECK-LABEL: define i64 @load_subvector_different_type		; CHECK-LABEL: define i64 @load_subvector_different_type
; CHECK-SAME: (ptr [[LOC:%.]], <8 x i32> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <8 x i32> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <8 x i32> [[V]], ptr [[LOC]], align 32		; CHECK-NEXT: store <8 x i32> [[V]], ptr [[LOC]], align 32
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i32> [[V]] to i256		; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[V]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128		; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <2 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>		; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
; CHECK-NEXT: ret i64 [[R]]		; CHECK-NEXT: ret i64 [[R]]
;		;
entry:		entry:
store <8 x i32> %v, ptr %loc		store <8 x i32> %v, ptr %loc
%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0		%gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
%ref = load <2 x i64>, ptr %gep		%ref = load <2 x i64>, ptr %gep
%r = extractelement <2 x i64> %ref, i32 1		%r = extractelement <2 x i64> %ref, i32 1
ret i64 %r		ret i64 %r
}		}

define i16 @load_subvector_different_type2(ptr %loc, <8 x i32> %v) {		define i16 @load_subvector_different_type2(ptr %loc, <8 x i32> %v) {
; CHECK-LABEL: define i16 @load_subvector_different_type2		; CHECK-LABEL: define i16 @load_subvector_different_type2
; CHECK-SAME: (ptr [[LOC:%.]], <8 x i32> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <8 x i32> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <8 x i32> [[V]], ptr [[LOC]], align 32		; CHECK-NEXT: store <8 x i32> [[V]], ptr [[LOC]], align 32
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i32> [[V]] to i256		; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[V]], <8 x i32> poison, <1 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i32		; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i32> [[TMP0]] to <2 x i16>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <2 x i16>		; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
; CHECK-NEXT: ret i16 [[R]]		; CHECK-NEXT: ret i16 [[R]]
;		;
entry:		entry:
store <8 x i32> %v, ptr %loc		store <8 x i32> %v, ptr %loc
%gep = getelementptr [2 x i16], ptr %loc, i64 0		%gep = getelementptr [2 x i16], ptr %loc, i64 0
%ref = load <2 x i16>, ptr %gep		%ref = load <2 x i16>, ptr %gep
%r = extractelement <2 x i16> %ref, i32 1		%r = extractelement <2 x i16> %ref, i32 1
ret i16 %r		ret i16 %r
}		}

define i4 @load_subvector_different_type3(ptr %loc, <8 x i8> %v) {		define i4 @load_subvector_different_type3(ptr %loc, <8 x i8> %v) {
; CHECK-LABEL: define i4 @load_subvector_different_type3		; CHECK-LABEL: define i4 @load_subvector_different_type3
; CHECK-SAME: (ptr [[LOC:%.]], <8 x i8> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <8 x i8> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8		; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64		; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i16		; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i8> [[TMP0]] to <6 x i4>
; CHECK-NEXT: [[TMP2:%.*]] = trunc i16 [[TMP1]] to i12		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <6 x i4> [[TMP1]], <6 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i12 [[TMP2]] to <3 x i4>		; CHECK-NEXT: [[R:%.*]] = extractelement <3 x i4> [[TMP2]], i32 1
; CHECK-NEXT: [[R:%.*]] = extractelement <3 x i4> [[TMP3]], i32 1
; CHECK-NEXT: ret i4 [[R]]		; CHECK-NEXT: ret i4 [[R]]
;		;
entry:		entry:
store <8 x i8> %v, ptr %loc		store <8 x i8> %v, ptr %loc
%gep = getelementptr [3 x i4], ptr %loc, i64 0		%gep = getelementptr [3 x i4], ptr %loc, i64 0
%ref = load <3 x i4>, ptr %gep		%ref = load <3 x i4>, ptr %gep
%r = extractelement <3 x i4> %ref, i32 1		%r = extractelement <3 x i4> %ref, i32 1
ret i4 %r		ret i4 %r
}		}

define i12 @load_subvector_different_type4(ptr %loc, <8 x i8> %v) {		define i12 @load_subvector_different_type4(ptr %loc, <8 x i8> %v) {
; CHECK-LABEL: define i12 @load_subvector_different_type4		; CHECK-LABEL: define i12 @load_subvector_different_type4
; CHECK-SAME: (ptr [[LOC:%.]], <8 x i8> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <8 x i8> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8		; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64		; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i24		; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i8> [[TMP0]] to <2 x i12>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i24 [[TMP1]] to <2 x i12>		; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i12> [[TMP1]], i32 1
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i12> [[TMP2]], i32 1
; CHECK-NEXT: ret i12 [[R]]		; CHECK-NEXT: ret i12 [[R]]
;		;
entry:		entry:
store <8 x i8> %v, ptr %loc		store <8 x i8> %v, ptr %loc
%gep = getelementptr [2 x i12], ptr %loc, i64 0		%gep = getelementptr [2 x i12], ptr %loc, i64 0
%ref = load <2 x i12>, ptr %gep		%ref = load <2 x i12>, ptr %gep
%r = extractelement <2 x i12> %ref, i32 1		%r = extractelement <2 x i12> %ref, i32 1
ret i12 %r		ret i12 %r
}		}

define i6 @load_subvector_different_type5(ptr %loc, <8 x i8> %v) {		define i6 @load_subvector_different_type5(ptr %loc, <8 x i8> %v) {
; CHECK-LABEL: define i6 @load_subvector_different_type5		; CHECK-LABEL: define i6 @load_subvector_different_type5
; CHECK-SAME: (ptr [[LOC:%.]], <8 x i8> [[V:%.]]) {		; CHECK-SAME: (ptr [[LOC:%.]], <8 x i8> [[V:%.]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8		; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64		; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i16		; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i8> [[TMP0]] to <4 x i6>
; CHECK-NEXT: [[TMP2:%.*]] = trunc i16 [[TMP1]] to i12		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i6> [[TMP1]], <4 x i6> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i12 [[TMP2]] to <2 x i6>		; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i6> [[TMP2]], i32 1
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i6> [[TMP3]], i32 1
; CHECK-NEXT: ret i6 [[R]]		; CHECK-NEXT: ret i6 [[R]]
;		;
entry:		entry:
store <8 x i8> %v, ptr %loc		store <8 x i8> %v, ptr %loc
%gep = getelementptr [2 x i6], ptr %loc, i64 0		%gep = getelementptr [2 x i6], ptr %loc, i64 0
%ref = load <2 x i6>, ptr %gep		%ref = load <2 x i6>, ptr %gep
%r = extractelement <2 x i6> %ref, i32 1		%r = extractelement <2 x i6> %ref, i32 1
ret i6 %r		ret i6 %r
}		}

		define i11 @load_greater_than_vector_type(ptr %loc, <4 x i6> %v) {
		; CHECK-LABEL: define i11 @load_greater_than_vector_type
		; CHECK-SAME: (ptr [[LOC:%.]], <4 x i6> [[V:%.]]) {
		; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4
		; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i6> [[V]], <4 x i6> poison, <11 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
		; CHECK-NEXT: [[TMP2:%.*]] = bitcast <11 x i6> [[TMP1]] to <6 x i11>
		; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i11> [[TMP2]], i64 0
		; CHECK-NEXT: ret i11 [[TMP3]]
		;
		store <4 x i6> %v, ptr %loc
		%ref = load i11, ptr %loc
		ret i11 %ref
		}

declare void @f(<4 x i32>)		declare void @f(<4 x i32>)
declare void @f_no_mem(<4 x i32>) memory(none)		declare void @f_no_mem(<4 x i32>) memory(none)

This is an archive of the discontinued LLVM Phabricator instance.

[GVN] Use vector ops when doing loadCoercion on a vector valueNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 551462

llvm/lib/Transforms/Utils/VNCoercion.cpp

llvm/test/Transforms/GVN/pr63059.ll

[GVN] Use vector ops when doing loadCoercion on a vector value
Needs ReviewPublic