Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -8043,45 +8043,49 @@ /* vec_sl */ -static __inline__ vector signed char __ATTRS_o_ai -vec_sl(vector signed char __a, vector unsigned char __b) { - return __a << (vector signed char)__b; -} - static __inline__ vector unsigned char __ATTRS_o_ai vec_sl(vector unsigned char __a, vector unsigned char __b) { - return __a << __b; + return __a << (__b % + (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__)); } -static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a, - vector unsigned short __b) { - return __a << (vector short)__b; +static __inline__ vector signed char __ATTRS_o_ai +vec_sl(vector signed char __a, vector unsigned char __b) { + return (vector signed char)vec_sl((vector unsigned char)__a, __b); } static __inline__ vector unsigned short __ATTRS_o_ai vec_sl(vector unsigned short __a, vector unsigned short __b) { - return __a << __b; + return __a << (__b % (vector unsigned short)(sizeof(unsigned short) * + __CHAR_BIT__)); } -static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a, - vector unsigned int __b) { - return __a << (vector int)__b; +static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a, + vector unsigned short __b) { + return (vector short)vec_sl((vector unsigned short)__a, __b); } static __inline__ vector unsigned int __ATTRS_o_ai vec_sl(vector unsigned int __a, vector unsigned int __b) { - return __a << __b; + return __a << (__b % + (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__)); } -#ifdef __POWER8_VECTOR__ -static __inline__ vector signed long long __ATTRS_o_ai -vec_sl(vector signed long long __a, vector unsigned long long __b) { - return __a << (vector long long)__b; +static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a, + vector unsigned int __b) { + return (vector int)vec_sl((vector unsigned int)__a, __b); } +#ifdef __POWER8_VECTOR__ static __inline__ vector unsigned long long __ATTRS_o_ai vec_sl(vector unsigned long long __a, vector unsigned long long __b) { - return __a << __b; + return __a << (__b % (vector unsigned long long)(sizeof(unsigned long long) * + __CHAR_BIT__)); +} + +static __inline__ vector long long __ATTRS_o_ai +vec_sl(vector long long __a, vector unsigned long long __b) { + return (vector long long)vec_sl((vector unsigned long long)__a, __b); } #endif Index: clang/test/CodeGen/builtins-ppc-altivec.c =================================================================== --- clang/test/CodeGen/builtins-ppc-altivec.c +++ clang/test/CodeGen/builtins-ppc-altivec.c @@ -3419,28 +3419,40 @@ /* vec_sl */ res_vsc = vec_sl(vsc, vuc); -// CHECK: shl <16 x i8> -// CHECK-LE: shl <16 x i8> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vuc = vec_sl(vuc, vuc); -// CHECK: shl <16 x i8> -// CHECK-LE: shl <16 x i8> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vs = vec_sl(vs, vus); -// CHECK: shl <8 x i16> -// CHECK-LE: shl <8 x i16> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vus = vec_sl(vus, vus); -// CHECK: shl <8 x i16> -// CHECK-LE: shl <8 x i16> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vi = vec_sl(vi, vui); -// CHECK: shl <4 x i32> -// CHECK-LE: shl <4 x i32> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vui = vec_sl(vui, vui); -// CHECK: shl <4 x i32> -// CHECK-LE: shl <4 x i32> +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vsc = vec_vslb(vsc, vuc); // CHECK: shl <16 x i8> Index: llvm/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/include/llvm/Target/TargetLowering.h +++ llvm/include/llvm/Target/TargetLowering.h @@ -1903,6 +1903,14 @@ return false; } + // Return true if the instruction that performs a << b actually performs + // a << (b % (sizeof(a) * 8)). + virtual bool supportsModuloShift(ISD::NodeType Inst, EVT ReturnType) const { + assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) && + "Expect a shift instruction"); + return false; + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4651,6 +4651,17 @@ } } + // If the target supports masking y in (shl, y), + // fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y) + if (TLI.supportsModuloShift((ISD::NodeType)N->getOpcode(), VT) && + N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (shl c1, c2) -> c1< (sra x, y) + if (TLI.supportsModuloShift((ISD::NodeType)N->getOpcode(), VT) && + N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + // Arithmetic shifting an all-sign-bit value is a no-op. if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) return N0; @@ -5000,6 +5022,17 @@ EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // If the target supports masking y in (srl, y), + // fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y) + if (TLI.supportsModuloShift((ISD::NodeType)N->getOpcode(), VT) && + N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -996,6 +996,13 @@ SDValue combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + + bool supportsModuloShift(ISD::NodeType Inst, + EVT ReturnType) const override { + assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) && + "Expect a shift instruction"); + return true; + } }; namespace PPC { Index: llvm/test/CodeGen/PowerPC/shift_mask.ll =================================================================== --- llvm/test/CodeGen/PowerPC/shift_mask.ll +++ llvm/test/CodeGen/PowerPC/shift_mask.ll @@ -5,7 +5,6 @@ define i8 @test000(i8 %a, i8 %b) { ; CHECK-LABEL: test000: ; CHECK: # BB#0: -; CHECK-NEXT: rlwinm 4, 4, 0, 29, 31 ; CHECK-NEXT: slw 3, 3, 4 ; CHECK-NEXT: blr %rem = and i8 %b, 7 @@ -16,7 +15,6 @@ define i16 @test001(i16 %a, i16 %b) { ; CHECK-LABEL: test001: ; CHECK: # BB#0: -; CHECK-NEXT: rlwinm 4, 4, 0, 28, 31 ; CHECK-NEXT: slw 3, 3, 4 ; CHECK-NEXT: blr %rem = and i16 %b, 15 @@ -27,7 +25,6 @@ define i32 @test002(i32 %a, i32 %b) { ; CHECK-LABEL: test002: ; CHECK: # BB#0: -; CHECK-NEXT: rlwinm 4, 4, 0, 27, 31 ; CHECK-NEXT: slw 3, 3, 4 ; CHECK-NEXT: blr %rem = and i32 %b, 31 @@ -38,7 +35,6 @@ define i64 @test003(i64 %a, i64 %b) { ; CHECK-LABEL: test003: ; CHECK: # BB#0: -; CHECK-NEXT: rlwinm 4, 4, 0, 26, 31 ; CHECK-NEXT: sld 3, 3, 4 ; CHECK-NEXT: blr %rem = and i64 %b, 63 @@ -49,8 +45,6 @@ define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test010: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisb 4, 7 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vslb 2, 2, 3 ; CHECK-NEXT: blr %rem = and <16 x i8> %b, @@ -61,8 +55,6 @@ define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test011: ; CHECK: # BB#0: -; CHECK-NEXT: vspltish 4, 15 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vslh 2, 2, 3 ; CHECK-NEXT: blr %rem = and <8 x i16> %b, @@ -73,10 +65,6 @@ define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test012: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisw 4, -16 -; CHECK-NEXT: vspltisw 5, 15 -; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vslw 2, 2, 3 ; CHECK-NEXT: blr %rem = and <4 x i32> %b, @@ -87,11 +75,6 @@ define <2 x i64> @test013(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test013: ; CHECK: # BB#0: -; CHECK-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI7_0@toc@l -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxswapd 36, 0 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsld 2, 2, 3 ; CHECK-NEXT: blr %rem = and <2 x i64> %b, @@ -103,7 +86,6 @@ ; CHECK-LABEL: test100: ; CHECK: # BB#0: ; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31 -; CHECK-NEXT: rlwinm 4, 4, 0, 29, 31 ; CHECK-NEXT: srw 3, 3, 4 ; CHECK-NEXT: blr %rem = and i8 %b, 7 @@ -115,7 +97,6 @@ ; CHECK-LABEL: test101: ; CHECK: # BB#0: ; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31 -; CHECK-NEXT: rlwinm 4, 4, 0, 28, 31 ; CHECK-NEXT: srw 3, 3, 4 ; CHECK-NEXT: blr %rem = and i16 %b, 15 @@ -126,7 +107,6 @@ define i32 @test102(i32 %a, i32 %b) { ; CHECK-LABEL: test102: ; CHECK: # BB#0: -; CHECK-NEXT: rlwinm 4, 4, 0, 27, 31 ; CHECK-NEXT: srw 3, 3, 4 ; CHECK-NEXT: blr %rem = and i32 %b, 31 @@ -137,7 +117,6 @@ define i64 @test103(i64 %a, i64 %b) { ; CHECK-LABEL: test103: ; CHECK: # BB#0: -; CHECK-NEXT: rlwinm 4, 4, 0, 26, 31 ; CHECK-NEXT: srd 3, 3, 4 ; CHECK-NEXT: blr %rem = and i64 %b, 63 @@ -148,8 +127,6 @@ define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test110: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisb 4, 7 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrb 2, 2, 3 ; CHECK-NEXT: blr %rem = and <16 x i8> %b, @@ -160,8 +137,6 @@ define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test111: ; CHECK: # BB#0: -; CHECK-NEXT: vspltish 4, 15 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrh 2, 2, 3 ; CHECK-NEXT: blr %rem = and <8 x i16> %b, @@ -172,10 +147,6 @@ define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test112: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisw 4, -16 -; CHECK-NEXT: vspltisw 5, 15 -; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrw 2, 2, 3 ; CHECK-NEXT: blr %rem = and <4 x i32> %b, @@ -186,11 +157,6 @@ define <2 x i64> @test113(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test113: ; CHECK: # BB#0: -; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxswapd 36, 0 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrd 2, 2, 3 ; CHECK-NEXT: blr %rem = and <2 x i64> %b, @@ -202,7 +168,6 @@ ; CHECK-LABEL: test200: ; CHECK: # BB#0: ; CHECK-NEXT: extsb 3, 3 -; CHECK-NEXT: rlwinm 4, 4, 0, 29, 31 ; CHECK-NEXT: sraw 3, 3, 4 ; CHECK-NEXT: blr %rem = and i8 %b, 7 @@ -214,7 +179,6 @@ ; CHECK-LABEL: test201: ; CHECK: # BB#0: ; CHECK-NEXT: extsh 3, 3 -; CHECK-NEXT: rlwinm 4, 4, 0, 28, 31 ; CHECK-NEXT: sraw 3, 3, 4 ; CHECK-NEXT: blr %rem = and i16 %b, 15 @@ -225,7 +189,6 @@ define i32 @test202(i32 %a, i32 %b) { ; CHECK-LABEL: test202: ; CHECK: # BB#0: -; CHECK-NEXT: rlwinm 4, 4, 0, 27, 31 ; CHECK-NEXT: sraw 3, 3, 4 ; CHECK-NEXT: blr %rem = and i32 %b, 31 @@ -236,7 +199,6 @@ define i64 @test203(i64 %a, i64 %b) { ; CHECK-LABEL: test203: ; CHECK: # BB#0: -; CHECK-NEXT: rlwinm 4, 4, 0, 26, 31 ; CHECK-NEXT: srad 3, 3, 4 ; CHECK-NEXT: blr %rem = and i64 %b, 63 @@ -247,8 +209,6 @@ define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test210: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisb 4, 7 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrab 2, 2, 3 ; CHECK-NEXT: blr %rem = and <16 x i8> %b, @@ -259,8 +219,6 @@ define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test211: ; CHECK: # BB#0: -; CHECK-NEXT: vspltish 4, 15 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrah 2, 2, 3 ; CHECK-NEXT: blr %rem = and <8 x i16> %b, @@ -271,10 +229,6 @@ define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test212: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisw 4, -16 -; CHECK-NEXT: vspltisw 5, 15 -; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsraw 2, 2, 3 ; CHECK-NEXT: blr %rem = and <4 x i32> %b, @@ -285,11 +239,6 @@ define <2 x i64> @test213(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test213: ; CHECK: # BB#0: -; CHECK-NEXT: addis 3, 2, .LCPI23_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI23_0@toc@l -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxswapd 36, 0 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrad 2, 2, 3 ; CHECK-NEXT: blr %rem = and <2 x i64> %b,