diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14515,18 +14515,15 @@ int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements() : SToVRHS.getValueType().getVectorNumElements(); int NumEltsOut = ShuffV.size(); - unsigned InElemSizeInBits = - SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() - : SToVRHS.getValueType().getScalarSizeInBits(); - unsigned OutElemSizeInBits = SToVLHS - ? LHS.getValueType().getScalarSizeInBits() - : RHS.getValueType().getScalarSizeInBits(); - // The width of the "valid lane" (i.e. the lane that contains the value that // is vectorized) needs to be expressed in terms of the number of elements // of the shuffle. It is thereby the ratio of the values before and after // any bitcast. - unsigned ValidLaneWidth = InElemSizeInBits / OutElemSizeInBits; + unsigned ValidLaneWidth = + SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() / + LHS.getValueType().getScalarSizeInBits() + : SToVRHS.getValueType().getVectorNumElements() / + RHS.getValueType().getScalarSizeInBits(); // Initially assume that neither input is permuted. These will be adjusted // accordingly if either input is. @@ -14539,9 +14536,10 @@ // ISD::SCALAR_TO_VECTOR. // On big endian systems, this only makes sense for element sizes smaller // than 64 bits since for 64-bit elements, all instructions already put - // the value into element zero. + // the value into element zero. Since scalar size of LHS and RHS may differ + // after isScalarToVec, this should be checked using their own sizes. if (SToVLHS) { - if (!IsLittleEndian && InElemSizeInBits >= 64) + if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64) return Res; // Set up the values for the shuffle vector fixup. LHSMaxIdx = NumEltsOut / NumEltsIn; @@ -14551,7 +14549,7 @@ LHS = SToVLHS; } if (SToVRHS) { - if (!IsLittleEndian && InElemSizeInBits >= 64) + if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64) return Res; RHSMinIdx = NumEltsOut; RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx; diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -2561,3 +2561,36 @@ ret double %vecext ; FIXME: add check patterns when variable element extraction is implemented } + +; To check when LHS is i32 to vector and RHS is i64 to vector, +; the combination should be skipped properly. +define <2 x i64> @buildi2(i64 %arg, i32 %arg1) { +; CHECK-LABEL: buildi2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r4, r4, 32 +; CHECK-NEXT: mtfprd f1, r3 +; CHECK-NEXT: mtfprd f0, r4 +; CHECK-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: buildi2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mtfprwz f0, r4 +; CHECK-LE-NEXT: mtfprd f1, r3 +; CHECK-LE-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-NEXT: blr +; +; CHECK-AIX-LABEL: buildi2: +; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: sldi 4, 4, 32 +; CHECK-AIX-NEXT: mtfprd 1, 3 +; CHECK-AIX-NEXT: mtfprd 0, 4 +; CHECK-AIX-NEXT: xxmrghd 34, 0, 1 +; CHECK-AIX-NEXT: blr +entry: + %lhs.i32 = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0 + %lhs = bitcast <4 x i32> %lhs.i32 to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +}