Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -301,6 +301,10 @@ /// of outputs. XXSWAPD, + /// An SDNode for swaps that are not associated with any loads/stores + /// and thereby have no chain. + SWAP_NO_CHAIN, + /// QVFPERM = This corresponds to the QPX qvfperm instruction. QVFPERM, Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1081,6 +1081,7 @@ case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; case PPCISD::RFEBB: return "PPCISD::RFEBB"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; + case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; @@ -7392,6 +7393,14 @@ DAG.getConstant(SplatIdx, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); } + + // Left shifts of 8 bytes are actually swaps. Convert accordingly. + if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); + SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); + } + } if (Subtarget.hasQPX()) { Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -66,6 +66,7 @@ def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; +def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; multiclass XX3Form_Rcr opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, @@ -947,6 +948,7 @@ def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), @@ -1302,7 +1304,7 @@ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); // Word extraction - dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64)); + dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64)); dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64)); dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); @@ -1572,7 +1574,7 @@ def : Pat<(f32 (vector_extract v4f32:$S, 1)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 2)), - (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; + (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), @@ -1677,7 +1679,7 @@ def : Pat<(f32 (vector_extract v4f32:$S, 0)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; def : Pat<(f32 (vector_extract v4f32:$S, 1)), - (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; + (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 2)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), Index: test/CodeGen/PowerPC/cannonicalize-vector-shifts.ll =================================================================== --- test/CodeGen/PowerPC/cannonicalize-vector-shifts.ll +++ test/CodeGen/PowerPC/cannonicalize-vector-shifts.ll @@ -0,0 +1,27 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +define <4 x i32> @test1(<4 x i32> %a) { +entry: +; CHECK-LABEL: test1 +; CHECK: xxswapd 34, 34 + %vecins6 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + ret <4 x i32> %vecins6 +} + +define <8 x i16> @test2(<8 x i16> %a) #0 { +entry: +; CHECK-LABEL: test2 +; CHECK: xxswapd 34, 34 + %vecins14 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + ret <8 x i16> %vecins14 +} + +define <16 x i8> @test3(<16 x i8> %a) #0 { +entry: +; CHECK-LABEL: test3 +; CHECK: xxswapd 34, 34 + %vecins30 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %vecins30 +} Index: test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll =================================================================== --- test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -1102,7 +1102,7 @@ ; CHECK: mfvsrwz 3, [[SHL]] ; CHECK: extsw 3, 3 ; CHECK-LE-LABEL: @getsi0 -; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34 ; CHECK-LE: mfvsrwz 3, [[SHL]] ; CHECK-LE: extsw 3, 3 } @@ -1150,7 +1150,7 @@ %vecext = extractelement <4 x i32> %0, i32 3 ret i32 %vecext ; CHECK-LABEL: @getsi3 -; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK: xxswapd [[SHL:[0-9]+]], 34 ; CHECK: mfvsrwz 3, [[SHL]] ; CHECK: extsw 3, 3 ; CHECK-LE-LABEL: @getsi3 @@ -1172,7 +1172,7 @@ ; CHECK: mfvsrwz 3, [[SHL]] ; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui0 -; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34 ; CHECK-LE: mfvsrwz 3, [[SHL]] ; CHECK-LE: clrldi 3, 3, 32 } @@ -1220,7 +1220,7 @@ %vecext = extractelement <4 x i32> %0, i32 3 ret i32 %vecext ; CHECK-LABEL: @getui3 -; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK: xxswapd [[SHL:[0-9]+]], 34 ; CHECK: mfvsrwz 3, [[SHL]] ; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui3 @@ -1380,7 +1380,7 @@ ; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 ; CHECK: xscvspdpn 1, [[SHL]] ; CHECK-LE-LABEL: @getf1 -; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34 ; CHECK-LE: xscvspdpn 1, [[SHL]] } @@ -1393,7 +1393,7 @@ %vecext = extractelement <4 x float> %0, i32 2 ret float %vecext ; CHECK-LABEL: @getf2 -; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK: xxswapd [[SHL:[0-9]+]], 34 ; CHECK: xscvspdpn 1, [[SHL]] ; CHECK-LE-LABEL: @getf2 ; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1