Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -317,6 +317,7 @@ Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); + Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt); Instruction *foldCastedBitwiseLogic(BinaryOperator &I); Instruction *narrowBinOp(TruncInst &Trunc); Instruction *narrowMaskedBinOp(BinaryOperator &And); Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -178,24 +178,43 @@ return &EI; } -static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, - InstCombiner::BuilderTy &Builder, - bool IsBigEndian) { +Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) { Value *X; uint64_t ExtIndexC; if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) || - !X->getType()->isVectorTy() || !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC))) return nullptr; + ElementCount NumElts = + cast(Ext.getVectorOperandType())->getElementCount(); + Type *DestTy = Ext.getType(); + bool IsBigEndian = DL.isBigEndian(); + + // If we are casting an integer to vector and extracting a portion, that is + // a shift-right and truncate. + // TODO: If no shift is needed, allow extra use? + // TODO: Allow FP dest type by casting the trunc to FP? + if (X->getType()->isIntegerTy() && DestTy->isIntegerTy() && + isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) && + Ext.getVectorOperand()->hasOneUse()) { + // Big endian requires adjusting the extract index since MSB is at index 0. + // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8 + // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8 + if (IsBigEndian) + ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC; + unsigned ShiftAmountC = ExtIndexC * DestTy->getPrimitiveSizeInBits(); + Value *Lshr = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset"); + return new TruncInst(Lshr, DestTy); + } + + if (!X->getType()->isVectorTy()) + return nullptr; + // If this extractelement is using a bitcast from a vector of the same number // of elements, see if we can find the source element from the source vector: // extelt (bitcast VecX), IndexC --> bitcast X[IndexC] auto *SrcTy = cast(X->getType()); - Type *DestTy = Ext.getType(); ElementCount NumSrcElts = SrcTy->getElementCount(); - ElementCount NumElts = - cast(Ext.getVectorOperandType())->getElementCount(); if (NumSrcElts == NumElts) if (Value *Elt = findScalarElement(X, ExtIndexC)) return new BitCastInst(Elt, DestTy); @@ -410,7 +429,7 @@ } } - if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian())) + if (Instruction *I = foldBitcastExtElt(EI)) return I; // If there's a vector PHI feeding a scalar use through this extractelement Index: llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll +++ llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll @@ -36,12 +36,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -109,12 +109,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) @@ -178,12 +178,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -251,12 +251,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) @@ -320,12 +320,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -393,12 +393,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) @@ -462,12 +462,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -535,12 +535,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) @@ -924,11 +924,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP1]], float [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -953,10 +953,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]] -; CHECK-NEXT: ret float [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP1]], float [[TMP4]] +; CHECK-NEXT: ret float [[TMP6]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1000,11 +1000,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP1]], double [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1 @@ -1025,10 +1025,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]] -; CHECK-NEXT: ret double [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP1]], double [[TMP4]] +; CHECK-NEXT: ret double [[TMP6]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = extractelement <2 x double> %1, i64 0 @@ -1066,11 +1066,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float 0.000000e+00, float [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1095,10 +1095,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00 -; CHECK-NEXT: ret float [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float 0.000000e+00, float [[TMP4]] +; CHECK-NEXT: ret float [[TMP6]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1140,11 +1140,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double 0.000000e+00, double [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1 @@ -1165,10 +1165,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00 -; CHECK-NEXT: ret double [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double 0.000000e+00, double [[TMP4]] +; CHECK-NEXT: ret double [[TMP6]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = extractelement <2 x double> %1, i64 0 @@ -1206,11 +1206,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[C]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP3]], float [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[C]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1235,10 +1235,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]] -; CHECK-NEXT: ret float [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP3]], float [[TMP4]] +; CHECK-NEXT: ret float [[TMP6]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1280,11 +1280,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[C]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[C]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 @@ -1305,10 +1305,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]] -; CHECK-NEXT: ret double [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: ret double [[TMP6]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = extractelement <2 x double> %a, i64 0 @@ -1348,11 +1348,11 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], float [[TMP6]], float [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP9]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1381,10 +1381,10 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]] -; CHECK-NEXT: ret float [[TMP9]] +; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], float [[TMP6]], float [[TMP5]] +; CHECK-NEXT: ret float [[TMP8]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1453,11 +1453,11 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], double [[TMP6]], double [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP9]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 @@ -1482,10 +1482,10 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]] -; CHECK-NEXT: ret double [[TMP9]] +; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], double [[TMP6]], double [[TMP5]] +; CHECK-NEXT: ret double [[TMP8]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = fsub <2 x double> , %1 @@ -1549,11 +1549,11 @@ ; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]] -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[C]], float [[TMP10]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP11]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], float [[TMP7]], float [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP10]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1584,10 +1584,10 @@ ; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]] -; CHECK-NEXT: ret float [[TMP10]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], float [[TMP7]], float [[TMP6]] +; CHECK-NEXT: ret float [[TMP9]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1660,11 +1660,11 @@ ; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]] -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[C]], double [[TMP10]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP11]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], double [[TMP7]], double [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP10]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 @@ -1691,10 +1691,10 @@ ; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]] -; CHECK-NEXT: ret double [[TMP10]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], double [[TMP7]], double [[TMP6]] +; CHECK-NEXT: ret double [[TMP9]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = fsub <2 x double> , %a Index: llvm/test/Transforms/InstCombine/X86/x86-avx512.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-avx512.ll +++ llvm/test/Transforms/InstCombine/X86/x86-avx512.ll @@ -36,12 +36,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -109,12 +109,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) @@ -178,12 +178,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -251,12 +251,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) @@ -320,12 +320,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -393,12 +393,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) @@ -462,12 +462,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -535,12 +535,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) @@ -924,11 +924,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP1]], float [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -953,10 +953,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]] -; CHECK-NEXT: ret float [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP1]], float [[TMP4]] +; CHECK-NEXT: ret float [[TMP6]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1000,11 +1000,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP1]], double [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1 @@ -1025,10 +1025,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]] -; CHECK-NEXT: ret double [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP1]], double [[TMP4]] +; CHECK-NEXT: ret double [[TMP6]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = extractelement <2 x double> %1, i64 0 @@ -1066,11 +1066,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float 0.000000e+00, float [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1095,10 +1095,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00 -; CHECK-NEXT: ret float [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float 0.000000e+00, float [[TMP4]] +; CHECK-NEXT: ret float [[TMP6]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1140,11 +1140,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double 0.000000e+00, double [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1 @@ -1165,10 +1165,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00 -; CHECK-NEXT: ret double [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double 0.000000e+00, double [[TMP4]] +; CHECK-NEXT: ret double [[TMP6]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = extractelement <2 x double> %1, i64 0 @@ -1206,11 +1206,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[C]], float [[TMP7]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP3]], float [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[C]], float [[TMP6]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP7]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1235,10 +1235,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]] -; CHECK-NEXT: ret float [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP3]], float [[TMP4]] +; CHECK-NEXT: ret float [[TMP6]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1280,11 +1280,11 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[C]], double [[TMP7]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[C]], double [[TMP6]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP7]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 @@ -1305,10 +1305,10 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]] -; CHECK-NEXT: ret double [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: ret double [[TMP6]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = extractelement <2 x double> %a, i64 0 @@ -1348,11 +1348,11 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], float [[TMP6]], float [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP9]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1381,10 +1381,10 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]] -; CHECK-NEXT: ret float [[TMP9]] +; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], float [[TMP6]], float [[TMP5]] +; CHECK-NEXT: ret float [[TMP8]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1453,11 +1453,11 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], double [[TMP6]], double [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP9]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 @@ -1482,10 +1482,10 @@ ; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]] -; CHECK-NEXT: ret double [[TMP9]] +; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], double [[TMP6]], double [[TMP5]] +; CHECK-NEXT: ret double [[TMP8]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = fsub <2 x double> , %1 @@ -1549,11 +1549,11 @@ ; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]] -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[C]], float [[TMP10]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP11]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], float [[TMP7]], float [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP10]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1584,10 +1584,10 @@ ; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]] -; CHECK-NEXT: ret float [[TMP10]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], float [[TMP7]], float [[TMP6]] +; CHECK-NEXT: ret float [[TMP9]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1660,11 +1660,11 @@ ; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]] -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[C]], double [[TMP10]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP11]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], double [[TMP7]], double [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP10]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 @@ -1691,10 +1691,10 @@ ; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]] -; CHECK-NEXT: ret double [[TMP10]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], double [[TMP7]], double [[TMP6]] +; CHECK-NEXT: ret double [[TMP9]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = fsub <2 x double> , %a Index: llvm/test/Transforms/InstCombine/extractelement.ll =================================================================== --- llvm/test/Transforms/InstCombine/extractelement.ll +++ llvm/test/Transforms/InstCombine/extractelement.ll @@ -330,11 +330,17 @@ ret <4 x double> %r } +; i32 is a desirable/supported type independent of data layout. + define i8 @bitcast_scalar_supported_type_index0(i32 %x) { -; ANY-LABEL: @bitcast_scalar_supported_type_index0( -; ANY-NEXT: [[V:%.*]] = bitcast i32 [[X:%.*]] to <4 x i8> -; ANY-NEXT: [[R:%.*]] = extractelement <4 x i8> [[V]], i8 0 -; ANY-NEXT: ret i8 [[R]] +; LE-LABEL: @bitcast_scalar_supported_type_index0( +; LE-NEXT: [[R:%.*]] = trunc i32 [[X:%.*]] to i8 +; LE-NEXT: ret i8 [[R]] +; +; BE-LABEL: @bitcast_scalar_supported_type_index0( +; BE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 24 +; BE-NEXT: [[R:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i8 +; BE-NEXT: ret i8 [[R]] ; %v = bitcast i32 %x to <4 x i8> %r = extractelement <4 x i8> %v, i8 0 @@ -342,27 +348,41 @@ } define i8 @bitcast_scalar_supported_type_index2(i32 %x) { -; ANY-LABEL: @bitcast_scalar_supported_type_index2( -; ANY-NEXT: [[V:%.*]] = bitcast i32 [[X:%.*]] to <4 x i8> -; ANY-NEXT: [[R:%.*]] = extractelement <4 x i8> [[V]], i64 2 -; ANY-NEXT: ret i8 [[R]] +; LE-LABEL: @bitcast_scalar_supported_type_index2( +; LE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 16 +; LE-NEXT: [[R:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i8 +; LE-NEXT: ret i8 [[R]] +; +; BE-LABEL: @bitcast_scalar_supported_type_index2( +; BE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 8 +; BE-NEXT: [[R:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i8 +; BE-NEXT: ret i8 [[R]] ; %v = bitcast i32 %x to <4 x i8> %r = extractelement <4 x i8> %v, i64 2 ret i8 %r } +; i64 is legal based on data layout. + define i4 @bitcast_scalar_legal_type_index3(i64 %x) { -; ANY-LABEL: @bitcast_scalar_legal_type_index3( -; ANY-NEXT: [[V:%.*]] = bitcast i64 [[X:%.*]] to <16 x i4> -; ANY-NEXT: [[R:%.*]] = extractelement <16 x i4> [[V]], i64 3 -; ANY-NEXT: ret i4 [[R]] +; LE-LABEL: @bitcast_scalar_legal_type_index3( +; LE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i64 [[X:%.*]], 12 +; LE-NEXT: [[R:%.*]] = trunc i64 [[EXTELT_OFFSET]] to i4 +; LE-NEXT: ret i4 [[R]] +; +; BE-LABEL: @bitcast_scalar_legal_type_index3( +; BE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i64 [[X:%.*]], 48 +; BE-NEXT: [[R:%.*]] = trunc i64 [[EXTELT_OFFSET]] to i4 +; BE-NEXT: ret i4 [[R]] ; %v = bitcast i64 %x to <16 x i4> %r = extractelement <16 x i4> %v, i64 3 ret i4 %r } +; negative test - don't create a shift for an illegal type. + define i8 @bitcast_scalar_illegal_type_index1(i128 %x) { ; ANY-LABEL: @bitcast_scalar_illegal_type_index1( ; ANY-NEXT: [[V:%.*]] = bitcast i128 [[X:%.*]] to <16 x i8> @@ -374,6 +394,8 @@ ret i8 %r } +; negative test - can't use shift/trunc on FP + define i8 @bitcast_fp_index0(float %x) { ; ANY-LABEL: @bitcast_fp_index0( ; ANY-NEXT: [[V:%.*]] = bitcast float [[X:%.*]] to <4 x i8> @@ -385,6 +407,8 @@ ret i8 %r } +; negative test - can't have FP dest type without a cast + define half @bitcast_fpvec_index0(i32 %x) { ; ANY-LABEL: @bitcast_fpvec_index0( ; ANY-NEXT: [[V:%.*]] = bitcast i32 [[X:%.*]] to <2 x half> @@ -396,6 +420,8 @@ ret half %r } +; negative test - need constant index + define i8 @bitcast_scalar_index_variable(i32 %x, i64 %y) { ; ANY-LABEL: @bitcast_scalar_index_variable( ; ANY-NEXT: [[V:%.*]] = bitcast i32 [[X:%.*]] to <4 x i8> @@ -407,6 +433,8 @@ ret i8 %r } +; negative test - no extra uses + define i8 @bitcast_scalar_index0_use(i64 %x) { ; ANY-LABEL: @bitcast_scalar_index0_use( ; ANY-NEXT: [[V:%.*]] = bitcast i64 [[X:%.*]] to <8 x i8>