diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2521,18 +2521,6 @@ (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>; def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)), (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>; - def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>; - def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>; - def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>; - def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>; - def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>; - def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)), - (v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>; def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)), (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>; @@ -2570,6 +2558,35 @@ (STXVRDX $src, xoaddr:$dst)>; } +// FIXME: The swap is overkill when the shift amount is a constant. +// We should just fix the constant in the DAG. +let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { + def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSLQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSLQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRAQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; + def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)), + (v1i128 (VSRAQ v1i128:$VRA, + (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC), + (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; +} + class xxevalPattern imm> : Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll b/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll --- a/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll @@ -13,6 +13,7 @@ define dso_local <1 x i128> @test_vec_vslq(<1 x i128> %a, <1 x i128> %b) { ; CHECK-LABEL: test_vec_vslq: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd v3, v3 ; CHECK-NEXT: vslq v2, v2, v3 ; CHECK-NEXT: blr entry: @@ -24,6 +25,7 @@ define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) { ; CHECK-LABEL: test_vec_vsrq: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd v3, v3 ; CHECK-NEXT: vsrq v2, v2, v3 ; CHECK-NEXT: blr entry: @@ -35,6 +37,7 @@ define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) { ; CHECK-LABEL: test_vec_vsraq: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd v3, v3 ; CHECK-NEXT: vsraq v2, v2, v3 ; CHECK-NEXT: blr entry: @@ -46,6 +49,7 @@ define dso_local <1 x i128> @test_vec_vslq2(<1 x i128> %a, <1 x i128> %b) { ; CHECK-LABEL: test_vec_vslq2: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd v3, v3 ; CHECK-NEXT: vslq v2, v2, v3 ; CHECK-NEXT: blr entry: @@ -56,6 +60,7 @@ define dso_local <1 x i128> @test_vec_vsrq2(<1 x i128> %a, <1 x i128> %b) { ; CHECK-LABEL: test_vec_vsrq2: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd v3, v3 ; CHECK-NEXT: vsrq v2, v2, v3 ; CHECK-NEXT: blr entry: @@ -66,6 +71,7 @@ define dso_local <1 x i128> @test_vec_vsraq2(<1 x i128> %a, <1 x i128> %b) { ; CHECK-LABEL: test_vec_vsraq2: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd v3, v3 ; CHECK-NEXT: vsraq v2, v2, v3 ; CHECK-NEXT: blr entry: