Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5697,12 +5697,22 @@ // When X == Y, this is rotate. Create the node directly if legal. // TODO: This should also be done if the operation is custom, but we have // to make sure targets are handling the modulo shift amount as expected. - // TODO: If the rotate direction (left or right) corresponding to the shift - // is not available, adjust the shift value and invert the direction. - auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; - if (X == Y && TLI.isOperationLegal(RotateOpcode, VT)) { - setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); - return nullptr; + if (X == Y) { + auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; + if (TLI.isOperationLegal(RotateOpcode, VT)) { + setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); + return nullptr; + } + // Try the opposite direction. + RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL; + if (TLI.isOperationLegal(RotateOpcode, VT) && + isPowerOf2_32(VT.getScalarSizeInBits())) { + // Negate the shift amount because it is safe to ignore the high bits. + SDValue Zero = DAG.getConstant(0, sdl, Z.getValueType()); + SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); + setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt)); + return nullptr; + } } // Get the shift amount and inverse shift amount, modulo the bit-width. Index: test/CodeGen/AArch64/funnel-shift-rot.ll =================================================================== --- test/CodeGen/AArch64/funnel-shift-rot.ll +++ test/CodeGen/AArch64/funnel-shift-rot.ll @@ -56,8 +56,7 @@ define i32 @rotl_i32(i32 %x, i32 %z) { ; CHECK-LABEL: rotl_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, wzr, #0x20 -; CHECK-NEXT: sub w8, w8, w1 +; CHECK-NEXT: neg w8, w1 ; CHECK-NEXT: ror w0, w0, w8 ; CHECK-NEXT: ret %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z) @@ -67,11 +66,8 @@ define i64 @rotl_i64(i64 %x, i64 %z) { ; CHECK-LABEL: rotl_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w9, wzr, #0x40 -; CHECK-NEXT: sub w9, w9, w1 -; CHECK-NEXT: lsl x8, x0, x1 -; CHECK-NEXT: lsr x9, x0, x9 -; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: ror x0, x0, x8 ; CHECK-NEXT: ret %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) ret i64 %f Index: test/CodeGen/PowerPC/funnel-shift-rot.ll =================================================================== --- test/CodeGen/PowerPC/funnel-shift-rot.ll +++ test/CodeGen/PowerPC/funnel-shift-rot.ll @@ -146,8 +146,7 @@ define i32 @rotr_i32(i32 %x, i32 %z) { ; CHECK-LABEL: rotr_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: subfic 4, 4, 32 -; CHECK-NEXT: clrlwi 4, 4, 27 +; CHECK-NEXT: neg 4, 4 ; CHECK-NEXT: rlwnm 3, 3, 4, 0, 31 ; CHECK-NEXT: blr %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z) @@ -157,9 +156,8 @@ define i64 @rotr_i64(i64 %x, i64 %z) { ; CHECK-LABEL: rotr_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: subfic 4, 4, 64 -; CHECK-NEXT: rlwinm 4, 4, 0, 26, 31 -; CHECK-NEXT: rotld 3, 3, 4 +; CHECK-NEXT: neg 4, 4 +; CHECK-NEXT: rldcl 3, 3, 4, 0 ; CHECK-NEXT: blr %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) ret i64 %f