diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1129,27 +1129,44 @@ SDValue Lo = GetPromotedInteger(N->getOperand(1)); SDValue Amount = GetPromotedInteger(N->getOperand(2)); - unsigned OldBits = N->getOperand(0).getScalarValueSizeInBits(); - unsigned NewBits = Hi.getScalarValueSizeInBits(); - - // Shift Lo up to occupy the upper bits of the promoted type. SDLoc DL(N); + EVT OldVT = N->getOperand(0).getValueType(); EVT VT = Lo.getValueType(); - Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, - DAG.getConstant(NewBits - OldBits, DL, VT)); + unsigned Opcode = N->getOpcode(); + bool IsFSHR = Opcode == ISD::FSHR; + unsigned OldBits = OldVT.getScalarSizeInBits(); + unsigned NewBits = VT.getScalarSizeInBits(); // Amount has to be interpreted modulo the old bit width. Amount = DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT)); - unsigned Opcode = N->getOpcode(); - if (Opcode == ISD::FSHR) { - // Increase Amount to shift the result into the lower bits of the promoted - // type. - Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, - DAG.getConstant(NewBits - OldBits, DL, VT)); + // If the promoted type is twice the size (or more), then we use the + // traditional funnel 'double' shift codegen. This isn't necessary if the + // shift amount is constant. + // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw. + // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)). + if (NewBits >= (2 * OldBits) && !isa(Amount) && + !TLI.isOperationLegalOrCustom(Opcode, VT)) { + SDValue HiShift = DAG.getConstant(OldBits, DL, VT); + Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift); + Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT); + SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo); + Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount); + if (!IsFSHR) + Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift); + return Res; } + // Shift Lo up to occupy the upper bits of the promoted type. + SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT); + Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset); + + // Increase Amount to shift the result into the lower bits of the promoted + // type. + if (IsFSHR) + Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset); + return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount); } diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll --- a/llvm/test/CodeGen/ARM/funnel-shift.ll +++ b/llvm/test/CodeGen/ARM/funnel-shift.ll @@ -19,13 +19,10 @@ define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) { ; CHECK-LABEL: fshl_i16: ; CHECK: @ %bb.0: -; CHECK-NEXT: and r2, r2, #15 -; CHECK-NEXT: mov r3, #31 -; CHECK-NEXT: lsl r1, r1, #16 -; CHECK-NEXT: bic r3, r3, r2 -; CHECK-NEXT: lsl r0, r0, r2 -; CHECK-NEXT: lsr r1, r1, #1 -; CHECK-NEXT: orr r0, r0, r1, lsr r3 +; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16 +; CHECK-NEXT: and r1, r2, #15 +; CHECK-NEXT: lsl r0, r0, r1 +; CHECK-NEXT: lsr r0, r0, #16 ; CHECK-NEXT: bx lr %f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z) ret i16 %f @@ -188,15 +185,9 @@ define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) { ; CHECK-LABEL: fshr_i16: ; CHECK: @ %bb.0: -; CHECK-NEXT: mov r3, #1 -; CHECK-NEXT: lsl r0, r0, #1 -; CHECK-NEXT: bfi r2, r3, #4, #28 -; CHECK-NEXT: mov r3, #31 -; CHECK-NEXT: bic r3, r3, r2 -; CHECK-NEXT: and r2, r2, #31 -; CHECK-NEXT: lsl r1, r1, #16 -; CHECK-NEXT: lsl r0, r0, r3 -; CHECK-NEXT: orr r0, r0, r1, lsr r2 +; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16 +; CHECK-NEXT: and r1, r2, #15 +; CHECK-NEXT: lsr r0, r0, r1 ; CHECK-NEXT: bx lr %f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z) ret i16 %f diff --git a/llvm/test/CodeGen/Mips/funnel-shift.ll b/llvm/test/CodeGen/Mips/funnel-shift.ll --- a/llvm/test/CodeGen/Mips/funnel-shift.ll +++ b/llvm/test/CodeGen/Mips/funnel-shift.ll @@ -19,15 +19,13 @@ define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) { ; CHECK-LABEL: fshl_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: andi $1, $6, 15 -; CHECK-NEXT: sllv $2, $4, $1 -; CHECK-NEXT: sll $3, $5, 16 -; CHECK-NEXT: srl $3, $3, 1 -; CHECK-NEXT: not $1, $1 -; CHECK-NEXT: andi $1, $1, 31 -; CHECK-NEXT: srlv $1, $3, $1 +; CHECK-NEXT: andi $1, $5, 65535 +; CHECK-NEXT: sll $2, $4, 16 +; CHECK-NEXT: or $1, $2, $1 +; CHECK-NEXT: andi $2, $6, 15 +; CHECK-NEXT: sllv $1, $1, $2 ; CHECK-NEXT: jr $ra -; CHECK-NEXT: or $2, $2, $1 +; CHECK-NEXT: srl $2, $1, 16 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z) ret i16 %f } @@ -288,15 +286,12 @@ define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) { ; CHECK-LABEL: fshr_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: sll $1, $5, 16 +; CHECK-NEXT: andi $1, $5, 65535 +; CHECK-NEXT: sll $2, $4, 16 +; CHECK-NEXT: or $1, $2, $1 ; CHECK-NEXT: andi $2, $6, 15 -; CHECK-NEXT: ori $3, $2, 16 -; CHECK-NEXT: srlv $1, $1, $3 -; CHECK-NEXT: sll $3, $4, 1 -; CHECK-NEXT: xori $2, $2, 15 -; CHECK-NEXT: sllv $2, $3, $2 ; CHECK-NEXT: jr $ra -; CHECK-NEXT: or $2, $2, $1 +; CHECK-NEXT: srlv $2, $1, $2 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z) ret i16 %f } diff --git a/llvm/test/CodeGen/RISCV/rv64Zbt.ll b/llvm/test/CodeGen/RISCV/rv64Zbt.ll --- a/llvm/test/CodeGen/RISCV/rv64Zbt.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll @@ -109,14 +109,13 @@ define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind { ; RV64I-LABEL: fshl_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a2, a2, 31 -; RV64I-NEXT: sll a0, a0, a2 -; RV64I-NEXT: not a2, a2 +; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: srli a1, a1, 1 -; RV64I-NEXT: srl a1, a1, a2 +; RV64I-NEXT: srli a1, a1, 32 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: andi a1, a2, 31 +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 32 ; RV64I-NEXT: ret ; ; RV64IB-LABEL: fshl_i32: @@ -162,14 +161,12 @@ define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind { ; RV64I-LABEL: fshr_i32: ; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: andi a2, a2, 31 -; RV64I-NEXT: ori a3, a2, 32 -; RV64I-NEXT: srl a1, a1, a3 -; RV64I-NEXT: slli a0, a0, 1 -; RV64I-NEXT: xori a2, a2, 31 -; RV64I-NEXT: sll a0, a0, a2 +; RV64I-NEXT: srli a1, a1, 32 ; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: andi a1, a2, 31 +; RV64I-NEXT: srl a0, a0, a1 ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ;