diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14515,18 +14515,15 @@ int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements() : SToVRHS.getValueType().getVectorNumElements(); int NumEltsOut = ShuffV.size(); - unsigned InElemSizeInBits = - SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() - : SToVRHS.getValueType().getScalarSizeInBits(); - unsigned OutElemSizeInBits = SToVLHS - ? LHS.getValueType().getScalarSizeInBits() - : RHS.getValueType().getScalarSizeInBits(); - // The width of the "valid lane" (i.e. the lane that contains the value that // is vectorized) needs to be expressed in terms of the number of elements // of the shuffle. It is thereby the ratio of the values before and after // any bitcast. - unsigned ValidLaneWidth = InElemSizeInBits / OutElemSizeInBits; + unsigned ValidLaneWidth = + SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() / + LHS.getValueType().getScalarSizeInBits() + : SToVRHS.getValueType().getVectorNumElements() / + RHS.getValueType().getScalarSizeInBits(); // Initially assume that neither input is permuted. These will be adjusted // accordingly if either input is. @@ -14541,7 +14538,7 @@ // than 64 bits since for 64-bit elements, all instructions already put // the value into element zero. if (SToVLHS) { - if (!IsLittleEndian && InElemSizeInBits >= 64) + if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64) return Res; // Set up the values for the shuffle vector fixup. LHSMaxIdx = NumEltsOut / NumEltsIn; @@ -14551,7 +14548,7 @@ LHS = SToVLHS; } if (SToVRHS) { - if (!IsLittleEndian && InElemSizeInBits >= 64) + if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64) return Res; RHSMinIdx = NumEltsOut; RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx; diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -2561,3 +2561,53 @@ ret double %vecext ; FIXME: add check patterns when variable element extraction is implemented } + +define <4 x i32> @buildi2(i32 %rk25, i32 %rk26) { +; CHECK-LABEL: buildi2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprwz f1, r3 +; CHECK-NEXT: lfdx f0, 0, r3 +; CHECK-NEXT: mtvsrwz v3, r4 +; CHECK-NEXT: xxspltw vs2, vs1, 1 +; CHECK-NEXT: xxsldwi vs1, vs1, vs2, 1 +; CHECK-NEXT: xxsldwi v2, vs1, vs0, 3 +; CHECK-NEXT: vmrglw v3, v2, v3 +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: xxsldwi v2, vs0, v3, 2 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: buildi2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: addis r5, r2, .LCPI82_0@toc@ha +; CHECK-LE-NEXT: lxsdx v3, 0, r3 +; CHECK-LE-NEXT: mtvsrwz v4, r3 +; CHECK-LE-NEXT: addis r3, r2, .LCPI82_1@toc@ha +; CHECK-LE-NEXT: addi r5, r5, .LCPI82_0@toc@l +; CHECK-LE-NEXT: addi r3, r3, .LCPI82_1@toc@l +; CHECK-LE-NEXT: lvx v2, 0, r5 +; CHECK-LE-NEXT: vperm v2, v3, v4, v2 +; CHECK-LE-NEXT: lvx v3, 0, r3 +; CHECK-LE-NEXT: mtvsrwz v4, r4 +; CHECK-LE-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-NEXT: blr +; +; CHECK-AIX-LABEL: buildi2: +; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: mtfprwz 1, 3 +; CHECK-AIX-NEXT: lfdx 0, 0, 3 +; CHECK-AIX-NEXT: mtvsrwz 35, 4 +; CHECK-AIX-NEXT: xxspltw 2, 1, 1 +; CHECK-AIX-NEXT: xxsldwi 1, 1, 2, 1 +; CHECK-AIX-NEXT: xxsldwi 34, 1, 0, 3 +; CHECK-AIX-NEXT: vmrglw 3, 2, 3 +; CHECK-AIX-NEXT: xxswapd 0, 34 +; CHECK-AIX-NEXT: xxsldwi 34, 0, 35, 2 +; CHECK-AIX-NEXT: blr +entry: + %rk29 = load <2 x i32>, <2 x i32>* undef, align 4 + %rk39 = insertelement <4 x i32> poison, i32 %rk25, i32 0 + %rk40 = shufflevector <2 x i32> %rk29, <2 x i32> poison, <4 x i32> + %rk41 = shufflevector <4 x i32> %rk39, <4 x i32> %rk40, <4 x i32> + %rk42 = insertelement <4 x i32> %rk41, i32 %rk26, i32 3 + ret <4 x i32> %rk42 +}