diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -1401,3 +1401,139 @@ %2 = bitcast<8 x i16> %1 to <4 x i32> ret <4 x i32> %2 } + +define <8 x float> @test_splat_multiuseW(<8 x float>* %vp) { +; P9-LABEL: test_splat_multiuseW: +; P9: # %bb.0: # %entry +; P9-NEXT: addis r4, r2, .LCPI26_0@toc@ha +; P9-NEXT: lxv v2, 0(r3) +; P9-NEXT: lxv v3, 16(r3) +; P9-NEXT: addi r4, r4, .LCPI26_0@toc@l +; P9-NEXT: lxv v4, 0(r4) +; P9-NEXT: vperm v3, v3, v2, v4 +; P9-NEXT: lxvwsx v2, 0, r3 +; P9-NEXT: blr +; +; P8-LABEL: test_splat_multiuseW: +; P8: # %bb.0: # %entry +; P8-NEXT: li r4, 16 +; P8-NEXT: addis r5, r2, .LCPI26_0@toc@ha +; P8-NEXT: lxvd2x vs1, 0, r3 +; P8-NEXT: lxvd2x vs0, r3, r4 +; P8-NEXT: addi r4, r5, .LCPI26_0@toc@l +; P8-NEXT: lxvd2x vs2, 0, r4 +; P8-NEXT: xxswapd v4, vs1 +; P8-NEXT: xxswapd v2, vs0 +; P8-NEXT: xxswapd v3, vs2 +; P8-NEXT: vperm v3, v4, v2, v3 +; P8-NEXT: xxspltw v2, v4, 3 +; P8-NEXT: blr +; +; P7-LABEL: test_splat_multiuseW: +; P7: # %bb.0: # %entry +; P7-NEXT: li r4, 16 +; P7-NEXT: addis r5, r2, .LCPI26_0@toc@ha +; P7-NEXT: lxvw4x v4, 0, r3 +; P7-NEXT: lxvw4x v2, r3, r4 +; P7-NEXT: addi r4, r5, .LCPI26_0@toc@l +; P7-NEXT: lxvw4x v3, 0, r4 +; P7-NEXT: vperm v3, v4, v2, v3 +; P7-NEXT: xxspltw v2, v4, 0 +; P7-NEXT: blr +; +; P9-AIX32-LABEL: test_splat_multiuseW: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lwz r4, L..C3(r2) # %const.0 +; P9-AIX32-NEXT: lxv v2, 0(r3) +; P9-AIX32-NEXT: lxv v3, 16(r3) +; P9-AIX32-NEXT: lxv v4, 0(r4) +; P9-AIX32-NEXT: vperm v3, v3, v2, v4 +; P9-AIX32-NEXT: lxvwsx v2, 0, r3 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: test_splat_multiuseW: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lwz r4, L..C3(r2) # %const.0 +; P8-AIX32-NEXT: li r5, 16 +; P8-AIX32-NEXT: lxvw4x v4, 0, r3 +; P8-AIX32-NEXT: lxvw4x v2, r3, r5 +; P8-AIX32-NEXT: lxvw4x v3, 0, r4 +; P8-AIX32-NEXT: vperm v3, v2, v4, v3 +; P8-AIX32-NEXT: xxspltw v2, v4, 0 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: test_splat_multiuseW: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lwz r4, L..C3(r2) # %const.0 +; P7-AIX32-NEXT: li r5, 16 +; P7-AIX32-NEXT: lxvw4x v4, 0, r3 +; P7-AIX32-NEXT: lxvw4x v2, r3, r5 +; P7-AIX32-NEXT: lxvw4x v3, 0, r4 +; P7-AIX32-NEXT: vperm v3, v4, v2, v3 +; P7-AIX32-NEXT: xxspltw v2, v4, 0 +; P7-AIX32-NEXT: blr +entry: + %vec = load <8 x float>, <8 x float>* %vp + %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> + ret <8 x float> %res +} + +define <4 x double> @test_splat_multiuseD(<4 x double>* %vp) { +; P9-LABEL: test_splat_multiuseD: +; P9: # %bb.0: # %entry +; P9-NEXT: lxv vs0, 0(r3) +; P9-NEXT: lxv vs1, 16(r3) +; P9-NEXT: xxmrghd v3, vs1, vs0 +; P9-NEXT: xxspltd v2, vs0, 0 +; P9-NEXT: blr +; +; P8-LABEL: test_splat_multiuseD: +; P8: # %bb.0: # %entry +; P8-NEXT: li r4, 16 +; P8-NEXT: lxvd2x vs1, 0, r3 +; P8-NEXT: lxvd2x vs0, r3, r4 +; P8-NEXT: xxswapd vs2, vs1 +; P8-NEXT: xxspltd v2, vs1, 0 +; P8-NEXT: xxswapd vs0, vs0 +; P8-NEXT: xxmrgld v3, vs2, vs0 +; P8-NEXT: blr +; +; P7-LABEL: test_splat_multiuseD: +; P7: # %bb.0: # %entry +; P7-NEXT: li r4, 16 +; P7-NEXT: lxvd2x vs1, 0, r3 +; P7-NEXT: lxvd2x vs0, r3, r4 +; P7-NEXT: xxspltd v2, vs1, 0 +; P7-NEXT: xxmrghd v3, vs0, vs1 +; P7-NEXT: blr +; +; P9-AIX32-LABEL: test_splat_multiuseD: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lxv vs0, 0(r3) +; P9-AIX32-NEXT: lxv vs1, 16(r3) +; P9-AIX32-NEXT: xxmrghd v3, vs1, vs0 +; P9-AIX32-NEXT: xxmrghd v2, vs0, vs0 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: test_splat_multiuseD: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: li r4, 16 +; P8-AIX32-NEXT: lxvd2x vs1, 0, r3 +; P8-AIX32-NEXT: lxvd2x vs0, r3, r4 +; P8-AIX32-NEXT: xxmrghd v2, vs1, vs1 +; P8-AIX32-NEXT: xxmrghd v3, vs0, vs1 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: test_splat_multiuseD: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: li r4, 16 +; P7-AIX32-NEXT: lxvd2x vs1, 0, r3 +; P7-AIX32-NEXT: lxvd2x vs0, r3, r4 +; P7-AIX32-NEXT: xxmrghd v2, vs1, vs1 +; P7-AIX32-NEXT: xxmrghd v3, vs0, vs1 +; P7-AIX32-NEXT: blr +entry: + %vec = load <4 x double>, <4 x double>* %vp + %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> + ret <4 x double> %res +}