Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -33128,6 +33128,37 @@ return Ret; } +static SDValue getShiftDoubleNode(SDNode *N, SelectionDAG &DAG, unsigned Opc, + EVT VT, EVT OpVT, const SDValue &Op0, + const SDValue &Op1, const SDValue &ShAmt) { + SDLoc DL(N); + unsigned Bits = OpVT.getScalarSizeInBits(); + SDValue ShAmtI8 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt); + if ((VT == MVT::i64 || VT == MVT::i32) && VT == OpVT) { + return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmtI8); + } else if (OpVT == MVT::i16) { + SDValue Op0I16 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op0); + SDValue Op1I16 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op1); + SDValue ShDbl = DAG.getNode(Opc, DL, OpVT, Op0I16, Op1I16, ShAmtI8); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShDbl); + } else if (OpVT == MVT::i8) { + + if (Opc == X86ISD::SHLD) { + SDValue Op1ShAmt = + DAG.getConstant(VT.getSizeInBits() - Bits, DL, MVT::i8); + SDValue Op1Shl = DAG.getNode(ISD::SHL, DL, VT, Op1, Op1ShAmt); + return DAG.getNode(Opc, DL, VT, Op0, Op1Shl, ShAmtI8); + } else if (Opc == X86ISD::SHRD) { + SDValue Op0ShAmt = + DAG.getConstant(VT.getSizeInBits() - Bits, DL, MVT::i8); + SDValue Op0Shl = DAG.getNode(ISD::SHL, DL, VT, Op0, Op0ShAmt); + SDValue Op0Shrd = DAG.getNode(Opc, DL, VT, Op0Shl, Op1, ShAmtI8); + return DAG.getNode(ISD::SRL, DL, VT, Op0Shrd, Op0ShAmt); + } + } + return SDValue(); +} + static SDValue combineOr(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -33199,21 +33230,53 @@ SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast(Sum)) { SDValue ShAmt1Op1 = ShAmt1.getOperand(1); - if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE) + if ((ShAmt1Op1.getOpcode() == ISD::TRUNCATE) || + (ShAmt1Op1.getOpcode() == ISD::ANY_EXTEND)) ShAmt1Op1 = ShAmt1Op1.getOperand(0); - if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0) - return DAG.getNode(Opc, DL, VT, - Op0, Op1, - DAG.getNode(ISD::TRUNCATE, DL, - MVT::i8, ShAmt0)); + if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0) { + return DAG.getNode(Opc, DL, VT, Op0, Op1, + DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); + } + if ((Op0.getOpcode() == ISD::AssertZext && + Op1.getOpcode() == ISD::AssertZext) && + ShAmt1Op1 == ShAmt0) { + // Op0 is ZEXT(Y, i16) + SDValue Op0Zext = Op0.getOperand(1); + VTSDNode *Op0VT = cast(Op0Zext); + // Op1 is ZEXT(X, i16) + SDValue Op1Zext = Op1.getOperand(1); + VTSDNode *Op1VT = cast(Op1Zext); + + if (Op0VT && Op1VT && (Op0VT->getVT() == Op1VT->getVT()) && + (Op0VT->getVT() == MVT::i16 || Op0VT->getVT() == MVT::i8)) { + return getShiftDoubleNode(N, DAG, Opc, VT, Op0VT->getVT(), Op0, Op1, + ShAmt0); + } + } + if ((Op0.getOpcode() == ISD::LOAD && Op1.getOpcode() == ISD::LOAD) && + ShAmt1Op1 == ShAmt0) { + LoadSDNode *Op0Ld = cast(Op0); + LoadSDNode *Op1Ld = cast(Op1); + EVT Op0LdVT = Op0Ld->getMemoryVT(); + EVT Op1LdVT = Op1Ld->getMemoryVT(); + + // If this is a ZEXTLoad. + if (Op0Ld && Op1Ld && ((Op0LdVT == MVT::i8) || (Op0LdVT == MVT::i16)) && + (Op0LdVT == Op1LdVT) && + (ISD::LoadExtType::ZEXTLOAD == Op0Ld->getExtensionType()) && + (ISD::LoadExtType::ZEXTLOAD == Op1Ld->getExtensionType())) + + { + return getShiftDoubleNode(N, DAG, Opc, VT, Op0LdVT, Op0, Op1, ShAmt0); + } + } + //----------------------------------------- } } else if (ConstantSDNode *ShAmt1C = dyn_cast(ShAmt1)) { ConstantSDNode *ShAmt0C = dyn_cast(ShAmt0); if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits) - return DAG.getNode(Opc, DL, VT, - N0.getOperand(0), N1.getOperand(0), - DAG.getNode(ISD::TRUNCATE, DL, - MVT::i8, ShAmt0)); + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), N1.getOperand(0), + DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); } else if (ShAmt1.getOpcode() == ISD::XOR) { SDValue Mask = ShAmt1.getOperand(1); if (ConstantSDNode *MaskC = dyn_cast(Mask)) { @@ -33232,7 +33295,126 @@ if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD && Op1.getOperand(0) == Op1.getOperand(1)) { return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0), - DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); + DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); + } + } + // ShAmpt1 node can be: XOR( ZEXT(C, i16), MASK) or it can be: XOR( C, + // MASK) + SDValue ShAmt1Zext = ShAmt1.getOperand(0); + if (ShAmt1Zext.getOpcode() == ISD::TRUNCATE) + ShAmt1Zext = ShAmt1Zext.getOperand(0); + + SDValue ShAmt1Const = ShAmt1.getOperand(1); + ConstantSDNode *XorMaskConst = dyn_cast(ShAmt1Const); + + // Op1 node is either SHL( ZEXT(Y, i16), 1) or SRL( ZEXT(Y, i16), 1) or + // SHL( LOAD(Y, i16), 1) + SDValue Op1Zext = Op1.getOperand(0); + SDValue Op1Const = Op1.getOperand(1); + ConstantSDNode *OneConst = dyn_cast(Op1Const); + + // Check for operands that were promoted to integer + if ((Op0.getOpcode() == ISD::AssertZext) && + (Op1.getOpcode() == ISD::SHL || Op1.getOpcode() == ISD::SRL) && + XorMaskConst && OneConst && (OneConst->getSExtValue() == 1) && + (Op1Zext.getOpcode() == ISD::AssertZext)) { + // Op0 node is ZEXT( X, i16) + // SDValue OpX = Op0.getOperand(0); // X (bits to shift) operand + SDValue Op0Prom = + Op0.getOperand(1); // X operand type before integer promotion + VTSDNode *Op0PromVT = cast(Op0Prom); + + // ShAmt0 node can be: ZEXT( C, i16) or it can be CopyFromReg(C) + EVT ShAmt0VT; + SDValue ShAmt0Prom; // C (shift amount) operand + if (ShAmt0.getOpcode() == ISD::AssertZext) { + ShAmt0Prom = ShAmt0.getOperand(0); // C (shift amount) operand + SDValue ShAmt0PromType = + ShAmt0.getOperand(1); // C operand type before integer promotion + VTSDNode *ShAmt0PromVT = cast(ShAmt0PromType); + ShAmt0VT = ShAmt0PromVT->getVT(); + } else { + ShAmt0Prom = ShAmt0; + ShAmt0VT = ShAmt0.getValueType(); + } + + // Op1Zext is ZEXT(Y, i16) + // SDValue OpY = Op1Zext.getOperand(0); // Y (bits source) operand + SDValue Op1Prom = + Op1Zext.getOperand(1); // Y operand type before integer promotion + VTSDNode *Op1PromVT = cast(Op1Prom); + + // ShAmt1Zext node is ZEXT( C, i16) or it can be CopyFromReg(C) + EVT ShAmt1VT; + SDValue ShAmt1Prom; // C (shift amount) operand + if (ShAmt1Zext.getOpcode() == ISD::AssertZext) { + ShAmt1Prom = ShAmt1Zext.getOperand(0); // C (shift amount) operand + SDValue ShAmt1PromType = ShAmt1Zext.getOperand( + 1); // C operand type before integer promotion + VTSDNode *ShAmt1PromVT = cast(ShAmt1PromType); + ShAmt1VT = ShAmt1PromVT->getVT(); + } else { + ShAmt1Prom = ShAmt1Zext; // C (shift amount) operand + ShAmt1VT = ShAmt1Zext.getValueType(); + } + + if (Op0PromVT && Op1PromVT && + (Op0PromVT->getVT() == Op1PromVT->getVT()) && + (ShAmt0VT == ShAmt1VT) && (ShAmt0Prom == ShAmt1Prom) && + (Op0PromVT->getVT() == MVT::i16 || Op0PromVT->getVT() == MVT::i8)) { + unsigned OpXBits = Op0PromVT->getVT().getSizeInBits(); + if (XorMaskConst->getSExtValue() == (OpXBits - 1)) { + return getShiftDoubleNode(N, DAG, Opc, VT, Op0PromVT->getVT(), Op0, + Op1Zext, ShAmt0); + } + } + } + // Op1 node is either SHL( LOAD(Y, i16), 1) or SRL( LOAD(Y, i16), 1) + SDValue Op1Load = Op1.getOperand(0); + + // ShAmpt1 node can be: XOR( LOAD(C, i16), MASK) or it can be: XOR( C, + // MASK) + SDValue ShAmt1Load = ShAmt1.getOperand(0); + if (ShAmt1Load.getOpcode() == ISD::TRUNCATE) + ShAmt1Load = ShAmt1Load.getOperand(0); + + // Check for operands that were loaded from memory and promoted to integer + if ((Op0.getOpcode() == ISD::LOAD && + (Op1.getOpcode() == ISD::SHL || Op1.getOpcode() == ISD::SRL) && + XorMaskConst && OneConst && (OneConst->getSExtValue() == 1) && + (Op1Load.getOpcode() == ISD::LOAD))) { + // Op0 node is LOAD( X, i16) + LoadSDNode *Op0Ld = cast(Op0); + EVT Op0VT = Op0Ld->getMemoryVT(); + + // ShAmt0 node can be: LOAD(C, zext i16) or it can be CopyFromReg(C) + EVT ShAmt0VT; + if (ShAmt0.getOpcode() == ISD::LOAD) { + LoadSDNode *ShAmt0Ld = cast(ShAmt0); + ShAmt0VT = ShAmt0Ld->getMemoryVT(); + } else { + ShAmt0VT = ShAmt0.getValueType(); + } + // Op1Load is LOAD(Y, i16) + LoadSDNode *Op1Ld = cast(Op1Load); + EVT Op1VT = Op1Ld->getMemoryVT(); + + // ShAmt1Load node is LOAD(C, zext i16) or it can be CopyFromReg(C) + EVT ShAmt1VT; + if (ShAmt1Load.getOpcode() == ISD::LOAD) { + LoadSDNode *ShAmt1Ld = cast(ShAmt1Load); + ShAmt1VT = ShAmt1Ld->getMemoryVT(); + } else { + ShAmt1VT = ShAmt1Load.getValueType(); + } + + if ((Op0VT == Op1VT) && (ShAmt0VT == ShAmt1VT) && + (ShAmt0 == ShAmt1Load) && (Op0VT == MVT::i16 || Op0VT == MVT::i8)) { + unsigned Op0Bits = Op0VT.getSizeInBits(); + if (XorMaskConst->getSExtValue() == (Op0Bits - 1)) { + return getShiftDoubleNode(N, DAG, Opc, VT, Op0VT, Op0, Op1Load, + ShAmt0); + } } } } Index: test/CodeGen/X86/shift-double-x86_64.ll =================================================================== --- test/CodeGen/X86/shift-double-x86_64.ll +++ test/CodeGen/X86/shift-double-x86_64.ll @@ -107,3 +107,273 @@ %sh = or i64 %sh_lo, %sh_hi ret i64 %sh } + +;------------------------------------------------------------------------------------- +; double shift left pattern +;uint_t shld(uint_t a, uint_t b, int shift) +;{ +; return (a << shift) | (b >> (sizeof(uint_t)*8 - shift)); +;} + +define i64 @shld64_sh64(i64 %a, i64 %b, i64 %bits) nounwind { + +; CHECK-LABEL: shld64_sh64: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + + %shl = shl i64 %a, %bits + %sub = sub i64 64, %bits + %shr = lshr i64 %b, %sub + %or = or i64 %shr, %shl + ret i64 %or +} + +define i64 @shld64_sh32(i64 %a, i64 %b, i32 %bits) nounwind { + +; CHECK-LABEL: shld64_sh32: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + + %sh_prom = zext i32 %bits to i64 + %shl = shl i64 %a, %sh_prom + %sub = sub nsw i64 64, %sh_prom + %shr = lshr i64 %b, %sub + %or = or i64 %shr, %shl + ret i64 %or +} + +define i64 @shld64_sh16(i64 %a, i64 %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shld64_sh16: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + + %sh_prom = zext i16 %bits to i64 + %shl = shl i64 %a, %sh_prom + %sub = sub nsw i64 64, %sh_prom + %shr = lshr i64 %b, %sub + %or = or i64 %shr, %shl + ret i64 %or +} + +define i64 @shld64_sh8(i64 %a, i64 %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shld64_sh8: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + + %sh_prom = zext i8 %bits to i64 + %shl = shl i64 %a, %sh_prom + %sub = sub nsw i64 64, %sh_prom + %shr = lshr i64 %b, %sub + %or = or i64 %shr, %shl + ret i64 %or +} + +;------------------------------------------------------------------------------------- +; double shift right pattern +;uint_t shrd(uint_t a, uint_t b, int shift) +;{ +; return (a >> shift) | (b << ( sizeof(uint_t)*8 - shift)); +;} + + +define i64 @shrd64_sh64(i64 %a, i64 %b, i64 %bits) nounwind { +; CHECK-LABEL: shrd64_sh64: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %shr = lshr i64 %a, %bits + %sub = sub i64 64, %bits + %shl = shl i64 %b, %sub + %or = or i64 %shl, %shr + ret i64 %or +} + +define i64 @shrd64_sh32(i64 %a, i64 %b, i32 %bits) nounwind { +; CHECK-LABEL: shrd64_sh32: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %sh_prom = zext i32 %bits to i64 + %shr = lshr i64 %a, %sh_prom + %sub = sub nsw i64 64, %sh_prom + %shl = shl i64 %b, %sub + %or = or i64 %shl, %shr + ret i64 %or +} + +define i64 @shrd64_sh16(i64 %a, i64 %b, i16 zeroext %bits) nounwind { +; CHECK-LABEL: shrd64_sh16: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %sh_prom = zext i16 %bits to i64 + %shr = lshr i64 %a, %sh_prom + %sub = sub nsw i64 64, %sh_prom + %shl = shl i64 %b, %sub + %or = or i64 %shl, %shr + ret i64 %or +} + +define i64 @shrd64_sh8(i64 %a, i64 %b, i8 zeroext %bits) nounwind { +; CHECK-LABEL: shrd64_sh8: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %sh_prom = zext i8 %bits to i64 + %shr = lshr i64 %a, %sh_prom + %sub = sub nsw i64 64, %sh_prom + %shl = shl i64 %b, %sub + %or = or i64 %shl, %shr + ret i64 %or +} + +;------------------------------------------------------------------------------------- +; double shift left with xor pattern +;uint64_t shldx(uint64_t a, uint64_t b, shift_t bits) +;{ +; return (a << bits) | ((b >> 1) >> (bits ^ (sizeof(a)*8 - 1))); +;} + +define i64 @shld64x_sh64(i64 %a, i64 %b, i64 %bits) nounwind { +; CHECK-LABEL: shld64x_sh64: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %shl = shl i64 %a, %bits + %shr = lshr i64 %b, 1 + %xor = xor i64 %bits, 63 + %shr1 = lshr i64 %shr, %xor + %or = or i64 %shr1, %shl + ret i64 %or +} + +define i64 @shld64x_sh32(i64 %a, i64 %b, i32 %bits) nounwind { +; CHECK-LABEL: shld64x_sh32: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %sh_prom = zext i32 %bits to i64 + %shl = shl i64 %a, %sh_prom + %shr = lshr i64 %b, 1 + %xor = xor i64 %sh_prom, 63 + %shr1 = lshr i64 %shr, %xor + %or = or i64 %shr1, %shl + ret i64 %or +} + +define i64 @shld64x_sh16(i64 %a, i64 %b, i16 zeroext %bits) nounwind { +; CHECK-LABEL: shld64x_sh16: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %sh_prom = zext i16 %bits to i64 + %shl = shl i64 %a, %sh_prom + %shr = lshr i64 %b, 1 + %xor0 = xor i16 %bits, 63 + %xor = zext i16 %xor0 to i64 + %shr2 = lshr i64 %shr, %xor + %or = or i64 %shr2, %shl + ret i64 %or +} + +define i64 @shld64x_sh8(i64 %a, i64 %b, i8 zeroext %bits) nounwind { +; CHECK-LABEL: shld64x_sh8: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shldq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %sh_prom = zext i8 %bits to i64 + %shl = shl i64 %a, %sh_prom + %shr = lshr i64 %b, 1 + %xor0 = xor i8 %bits, 63 + %xor = zext i8 %xor0 to i64 + %shr2 = lshr i64 %shr, %xor + %or = or i64 %shr2, %shl + ret i64 %or +} + +;------------------------------------------------------------------------------------- +; double shift right with xor pattern +;uint64_t shrdx(uint64_t a, uint64_t b, shift_t bits) +;{ +; return (a >> bits) | ((b << 1) << (bits ^ (sizeof(a)*8 - 1))); +;} + +define i64 @shrd64x_sh64(i64 %a, i64 %b, i64 %bits) nounwind { +; CHECK-LABEL: shrd64x_sh64: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %shr = lshr i64 %a, %bits + %shl = shl i64 %b, 1 + %xor = xor i64 %bits, 63 + %shl1 = shl i64 %shl, %xor + %or = or i64 %shl1, %shr + ret i64 %or +} + +define i64 @shrd64x_sh32(i64 %a, i64 %b, i32 %bits) nounwind { +; CHECK-LABEL: shrd64x_sh32: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %sh_prom = zext i32 %bits to i64 + %shr = lshr i64 %a, %sh_prom + %shl = shl i64 %b, 1 + %xor = xor i64 %sh_prom, 63 + %shl1 = shl i64 %shl, %xor + %or = or i64 %shl1, %shr + ret i64 %or +} + +define i64 @shrd64x_sh16(i64 %a, i64 %b, i16 zeroext %bits) nounwind { +; CHECK-LABEL: shrd64x_sh16: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %sh_prom = zext i16 %bits to i64 + %shr = lshr i64 %a, %sh_prom + %shl = shl i64 %b, 1 + %xor0 = xor i16 %bits, 63 + %xor = zext i16 %xor0 to i64 + %shl2 = shl i64 %shl, %xor + %or = or i64 %shl2, %shr + ret i64 %or +} + +define i64 @shrd64x_sh8(i64 %a, i64 %b, i8 zeroext %bits) nounwind { +; CHECK-LABEL: shrd64x_sh8: +; CHECK: movl %edx, %ecx +; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK: retq + %sh_prom = zext i8 %bits to i64 + %shr = lshr i64 %a, %sh_prom + %shl = shl i64 %b, 1 + %xor0 = xor i8 %bits, 63 + %xor = zext i8 %xor0 to i64 + %shl2 = shl i64 %shl, %xor + %or = or i64 %shl2, %shr + ret i64 %or +} + Index: test/CodeGen/X86/shift-double.ll =================================================================== --- test/CodeGen/X86/shift-double.ll +++ test/CodeGen/X86/shift-double.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 ; Shift i64 integers on 32-bit target @@ -310,3 +311,1310 @@ %sh = or i32 %sh_lo, %sh_hi ret i32 %sh } + + +;------------------------------------------------------------------------------------- +; double shift left pattern +;uint_t shld(uint_t a, uint_t b, int shift) +;{ +; return (a << shift) | (b >> (sizeof(uint_t)*8 - shift)); +;} + +define i32 @shld32_sh64(i32 %a, i32 %b, i64 %bits) nounwind { + +; CHECK-LABEL: shld32_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld32_sh64: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %sh_prom = trunc i64 %bits to i32 + %shl = shl i32 %a, %sh_prom + %sub = sub i64 32, %bits + %sh_prom1 = trunc i64 %sub to i32 + %shr = lshr i32 %b, %sh_prom1 + %or = or i32 %shr, %shl + ret i32 %or +} + +define i32 @shld32_sh32(i32 %a, i32 %b, i32 %bits) nounwind { + +; CHECK-LABEL: shld32_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld32_sh32: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %shl = shl i32 %a, %bits + %sub = sub i32 32, %bits + %shr = lshr i32 %b, %sub + %or = or i32 %shr, %shl + ret i32 %or +} + +define i32 @shld32_sh16(i32 %a, i32 %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shld32_sh16: +; CHECK: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld32_sh16: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %bits to i32 + %shl = shl i32 %a, %conv + %sub = sub nsw i32 32, %conv + %shr = lshr i32 %b, %sub + %or = or i32 %shr, %shl + ret i32 %or +} + +define i32 @shld32_sh8(i32 %a, i32 %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shld32_sh8: +; CHECK: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld32_sh8: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %bits to i32 + %shl = shl i32 %a, %conv + %sub = sub nsw i32 32, %conv + %shr = lshr i32 %b, %sub + %or = or i32 %shr, %shl + ret i32 %or +} + +define zeroext i16 @shld16_sh64(i16 zeroext %a, i16 zeroext %b, i64 %bits) nounwind { + +; CHECK-LABEL: shld16_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shldw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shld16_sh64: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %sh_prom = trunc i64 %bits to i32 + %shl = shl i32 %conv, %sh_prom + %conv1 = zext i16 %b to i32 + %sub = sub i64 16, %bits + %sh_prom2 = trunc i64 %sub to i32 + %shr = lshr i32 %conv1, %sh_prom2 + %or = or i32 %shr, %shl + %conv3 = trunc i32 %or to i16 + ret i16 %conv3 +} + +define zeroext i16 @shld16_sh32(i16 zeroext %a, i16 zeroext %b, i32 %bits) nounwind { + +; CHECK-LABEL: shld16_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shldw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shld16_sh32: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %shl = shl i32 %conv, %bits + %conv1 = zext i16 %b to i32 + %sub = sub i32 16, %bits + %shr = lshr i32 %conv1, %sub + %or = or i32 %shr, %shl + %conv3 = trunc i32 %or to i16 + ret i16 %conv3 +} + +define zeroext i16 @shld16_sh16(i16 zeroext %a, i16 zeroext %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shld16_sh16: +; CHECK: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shldw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shld16_sh16: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %conv1 = zext i16 %bits to i32 + %shl = shl i32 %conv, %conv1 + %conv2 = zext i16 %b to i32 + %sub = sub nsw i32 16, %conv1 + %shr = lshr i32 %conv2, %sub + %or = or i32 %shr, %shl + %conv4 = trunc i32 %or to i16 + ret i16 %conv4 +} + +define zeroext i16 @shld16_sh8(i16 zeroext %a, i16 zeroext %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shld16_sh8: +; CHECK: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shldw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shld16_sh8: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %conv1 = zext i8 %bits to i32 + %shl = shl i32 %conv, %conv1 + %conv2 = zext i16 %b to i32 + %sub = sub nsw i32 16, %conv1 + %shr = lshr i32 %conv2, %sub + %or = or i32 %shr, %shl + %conv4 = trunc i32 %or to i16 + ret i16 %conv4 +} + +define zeroext i8 @shld8_sh64(i8 zeroext %a, i8 zeroext %b, i64 %bits) nounwind { + +; CHECK-LABEL: shld8_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %edx +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld8_sh64: +; CHECK64: shll $24, %esi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %sh_prom = trunc i64 %bits to i32 + %shl = shl i32 %conv, %sh_prom + %conv1 = zext i8 %b to i32 + %sub = sub i64 8, %bits + %sh_prom2 = trunc i64 %sub to i32 + %shr = lshr i32 %conv1, %sh_prom2 + %or = or i32 %shr, %shl + %conv3 = trunc i32 %or to i8 + ret i8 %conv3 +} + +define zeroext i8 @shld8_sh32(i8 zeroext %a, i8 zeroext %b, i32 %bits) nounwind { + +; CHECK-LABEL: shld8_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %edx +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld8_sh32: +; CHECK64: shll $24, %esi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %shl = shl i32 %conv, %bits + %conv1 = zext i8 %b to i32 + %sub = sub i32 8, %bits + %shr = lshr i32 %conv1, %sub + %or = or i32 %shr, %shl + %conv3 = trunc i32 %or to i8 + ret i8 %conv3 +} + +define zeroext i8 @shld8_sh16(i8 zeroext %a, i8 zeroext %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shld8_sh16: +; CHECK: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %edx +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld8_sh16: +; CHECK64: shll $24, %esi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %conv1 = zext i16 %bits to i32 + %shl = shl i32 %conv, %conv1 + %conv2 = zext i8 %b to i32 + %sub = sub nsw i32 8, %conv1 + %shr = lshr i32 %conv2, %sub + %or = or i32 %shr, %shl + %conv4 = trunc i32 %or to i8 + ret i8 %conv4 +} + +define zeroext i8 @shld8_sh8(i8 zeroext %a, i8 zeroext %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shld8_sh8: +; CHECK: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %edx +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld8_sh8: +; CHECK64: shll $24, %esi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %conv1 = zext i8 %bits to i32 + %shl = shl i32 %conv, %conv1 + %conv2 = zext i8 %b to i32 + %sub = sub nsw i32 8, %conv1 + %shr = lshr i32 %conv2, %sub + %or = or i32 %shr, %shl + %conv4 = trunc i32 %or to i8 + ret i8 %conv4 +} + +;------------------------------------------------------------------------------------- +; double shift right pattern +;uint_t shrd(uint_t a, uint_t b, int shift) +;{ +; return (a >> shift) | (b << ( sizeof(uint_t)*8 - shift)); +;} + +define i32 @shrd32_sh64(i32 %a, i32 %b, i64 %bits) nounwind { + +; CHECK-LABEL: shrd32_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd32_sh64: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %sh_prom = trunc i64 %bits to i32 + %shr = lshr i32 %a, %sh_prom + %sub = sub i64 32, %bits + %sh_prom1 = trunc i64 %sub to i32 + %shl = shl i32 %b, %sh_prom1 + %or = or i32 %shl, %shr + ret i32 %or +} + +define i32 @shrd32_sh32(i32 %a, i32 %b, i32 %bits) nounwind { + +; CHECK-LABEL: shrd32_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd32_sh32: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %shr = lshr i32 %a, %bits + %sub = sub i32 32, %bits + %shl = shl i32 %b, %sub + %or = or i32 %shl, %shr + ret i32 %or +} + +define i32 @shrd32_sh16(i32 %a, i32 %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd32_sh16: +; CHECK: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd32_sh16: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %bits to i32 + %shr = lshr i32 %a, %conv + %sub = sub nsw i32 32, %conv + %shl = shl i32 %b, %sub + %or = or i32 %shl, %shr + ret i32 %or +} + +define i32 @shrd32_sh8(i32 %a, i32 %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd32_sh8: +; CHECK: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd32_sh8: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %bits to i32 + %shr = lshr i32 %a, %conv + %sub = sub nsw i32 32, %conv + %shl = shl i32 %b, %sub + %or = or i32 %shl, %shr + ret i32 %or +} + +define zeroext i16 @shrd16_sh64(i16 zeroext %a, i16 zeroext %b, i64 %bits) nounwind { + +; CHECK-LABEL: shrd16_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shrdw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shrd16_sh64: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %sh_prom = trunc i64 %bits to i32 + %shr = lshr i32 %conv, %sh_prom + %conv1 = zext i16 %b to i32 + %sub = sub i64 16, %bits + %sh_prom2 = trunc i64 %sub to i32 + %shl = shl i32 %conv1, %sh_prom2 + %or = or i32 %shl, %shr + %conv3 = trunc i32 %or to i16 + ret i16 %conv3 +} + +define zeroext i16 @shrd16_sh32(i16 zeroext %a, i16 zeroext %b, i32 %bits) nounwind { + +; CHECK-LABEL: shrd16_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shrdw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shrd16_sh32: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %shr = lshr i32 %conv, %bits + %conv1 = zext i16 %b to i32 + %sub = sub i32 16, %bits + %shl = shl i32 %conv1, %sub + %or = or i32 %shl, %shr + %conv3 = trunc i32 %or to i16 + ret i16 %conv3 +} + +define zeroext i16 @shrd16_sh16(i16 zeroext %a, i16 zeroext %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd16_sh16: +; CHECK: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shrdw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shrd16_sh16: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %conv1 = zext i16 %bits to i32 + %shr = lshr i32 %conv, %conv1 + %conv2 = zext i16 %b to i32 + %sub = sub nsw i32 16, %conv1 + %shl = shl i32 %conv2, %sub + %or = or i32 %shl, %shr + %conv4 = trunc i32 %or to i16 + ret i16 %conv4 +} + +define zeroext i16 @shrd16_sh8(i16 zeroext %a, i16 zeroext %b, i8 zeroext %bits) nounwind { +; CHECK-LABEL: shrd16_sh8: +; CHECK: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shrdw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shrd16_sh8: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %conv1 = zext i8 %bits to i32 + %shr = lshr i32 %conv, %conv1 + %conv2 = zext i16 %b to i32 + %sub = sub nsw i32 16, %conv1 + %shl = shl i32 %conv2, %sub + %or = or i32 %shl, %shr + %conv4 = trunc i32 %or to i16 + ret i16 %conv4 +} + +define zeroext i8 @shrd8_sh64(i8 zeroext %a, i8 zeroext %b, i64 %bits) nounwind { + +; CHECK-LABEL: shrd8_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: shrl $24, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd8_sh64: +; CHECK64: shll $24, %edi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: shrl $24, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %sh_prom = trunc i64 %bits to i32 + %shr = lshr i32 %conv, %sh_prom + %conv1 = zext i8 %b to i32 + %sub = sub i64 8, %bits + %sh_prom2 = trunc i64 %sub to i32 + %shl = shl i32 %conv1, %sh_prom2 + %or = or i32 %shl, %shr + %conv3 = trunc i32 %or to i8 + ret i8 %conv3 +} + +define zeroext i8 @shrd8_sh32(i8 zeroext %a, i8 zeroext %b, i32 %bits) nounwind { + +; CHECK-LABEL: shrd8_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: shrl $24, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd8_sh32: +; CHECK64: shll $24, %edi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: shrl $24, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %shr = lshr i32 %conv, %bits + %conv1 = zext i8 %b to i32 + %sub = sub i32 8, %bits + %shl = shl i32 %conv1, %sub + %or = or i32 %shl, %shr + %conv3 = trunc i32 %or to i8 + ret i8 %conv3 +} + +define zeroext i8 @shrd8_sh16(i8 zeroext %a, i8 zeroext %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd8_sh16: +; CHECK: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: shrl $24, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd8_sh16: +; CHECK64: shll $24, %edi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: shrl $24, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %conv1 = zext i16 %bits to i32 + %shr = lshr i32 %conv, %conv1 + %conv2 = zext i8 %b to i32 + %sub = sub nsw i32 8, %conv1 + %shl = shl i32 %conv2, %sub + %or = or i32 %shl, %shr + %conv4 = trunc i32 %or to i8 + ret i8 %conv4 +} + +define zeroext i8 @shrd8_sh8(i8 zeroext %a, i8 zeroext %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd8_sh8: +; CHECK: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: shrl $24, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd8_sh8: +; CHECK64: shll $24, %edi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: shrl $24, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %conv1 = zext i8 %bits to i32 + %shr = lshr i32 %conv, %conv1 + %conv2 = zext i8 %b to i32 + %sub = sub nsw i32 8, %conv1 + %shl = shl i32 %conv2, %sub + %or = or i32 %shl, %shr + %conv4 = trunc i32 %or to i8 + ret i8 %conv4 +} + +;------------------------------------------------------------------------------------- +; double shift left with xor pattern +;uint64_t shldx(uint64_t a, uint64_t b, shift_t bits) +;{ +; return (a << bits) | ((b >> 1) >> (bits ^ (sizeof(a)*8 - 1))); +;} + + +define i32 @shld32x_sh64(i32 %a, i32 %b, i64 %bits) nounwind { + +; CHECK-LABEL: shld32x_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld32x_sh64: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %sh_prom = trunc i64 %bits to i32 + %shl = shl i32 %a, %sh_prom + %shr = lshr i32 %b, 1 + %sh_prom1 = xor i32 %sh_prom, 31 + %shr2 = lshr i32 %shr, %sh_prom1 + %or = or i32 %shr2, %shl + ret i32 %or +} + +define i32 @shld32x_sh32(i32 %a, i32 %b, i32 %bits) nounwind { + +; CHECK-LABEL: shld32x_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld32x_sh32: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %shl = shl i32 %a, %bits + %shr = lshr i32 %b, 1 + %xor0 = xor i32 %bits, 31 + %shr1 = lshr i32 %shr, %xor0 + %or = or i32 %shr1, %shl + ret i32 %or +} + +define i32 @shld32x_sh16(i32 %a, i32 %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shld32x_sh16: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld32x_sh16: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %bits to i32 + %shl = shl i32 %a, %conv + %shr = lshr i32 %b, 1 + %xor0 = xor i16 %bits, 31 + %sh_prom = zext i16 %xor0 to i32 + %shr2 = lshr i32 %shr, %sh_prom + %or = or i32 %shr2, %shl + ret i32 %or +} + +define i32 @shld32x_sh8(i32 %a, i32 %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shld32x_sh8: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld32x_sh8: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %bits to i32 + %shl = shl i32 %a, %conv + %shr = lshr i32 %b, 1 + %xor0 = xor i8 %bits, 31 + %sh_prom = zext i8 %xor0 to i32 + %shr2 = lshr i32 %shr, %sh_prom + %or = or i32 %shr2, %shl + ret i32 %or +} + +define zeroext i16 @shld16x_sh64(i16 zeroext %a, i16 zeroext %b, i64 %bits) nounwind { + +; CHECK-LABEL: shld16x_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shldw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shld16x_sh64: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %sh_prom = trunc i64 %bits to i32 + %shl = shl i32 %conv, %sh_prom + %conv1 = zext i16 %b to i32 + %lshr0 = lshr i32 %conv1, 1 + %sh_prom2 = xor i32 %sh_prom, 15 + %shr3 = lshr i32 %lshr0, %sh_prom2 + %or = or i32 %shr3, %shl + %conv4 = trunc i32 %or to i16 + ret i16 %conv4 +} + +define zeroext i16 @shld16x_sh32(i16 zeroext %a, i16 zeroext %b, i32 %bits) nounwind { + +; CHECK-LABEL: shld16x_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shldw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shld16x_sh32: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %shl = shl i32 %conv, %bits + %conv1 = zext i16 %b to i32 + %lshr0 = lshr i32 %conv1, 1 + %xor1 = xor i32 %bits, 15 + %shr3 = lshr i32 %lshr0, %xor1 + %or = or i32 %shr3, %shl + %conv4 = trunc i32 %or to i16 + ret i16 %conv4 +} + +define zeroext i16 @shld16x_sh16(i16 zeroext %a, i16 zeroext %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shld16x_sh16: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shldw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shld16x_sh16: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %conv1 = zext i16 %bits to i32 + %shl = shl i32 %conv, %conv1 + %conv2 = zext i16 %b to i32 + %lshr0 = lshr i32 %conv2, 1 + %xor1 = xor i16 %bits, 15 + %sh_prom = zext i16 %xor1 to i32 + %shr4 = lshr i32 %lshr0, %sh_prom + %or = or i32 %shr4, %shl + %conv5 = trunc i32 %or to i16 + ret i16 %conv5 +} + +define zeroext i16 @shld16x_sh8(i16 zeroext %a, i16 zeroext %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shld16x_sh8: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shldw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shld16x_sh8: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shldw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %conv1 = zext i8 %bits to i32 + %shl = shl i32 %conv, %conv1 + %conv2 = zext i16 %b to i32 + %lshr0 = lshr i32 %conv2, 1 + %xor1 = xor i8 %bits, 15 + %sh_prom = zext i8 %xor1 to i32 + %shr4 = lshr i32 %lshr0, %sh_prom + %or = or i32 %shr4, %shl + %conv5 = trunc i32 %or to i16 + ret i16 %conv5 +} + +define zeroext i8 @shld8x_sh64(i8 zeroext %a, i8 zeroext %b, i64 %bits) nounwind { + +; CHECK-LABEL: shld8x_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %edx +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld8x_sh64: +; CHECK64: shll $24, %esi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %sh_prom = trunc i64 %bits to i32 + %shl = shl i32 %conv, %sh_prom + %conv1 = zext i8 %b to i32 + %lshr0 = lshr i32 %conv1, 1 + %sh_prom2 = xor i32 %sh_prom, 7 + %shr3 = lshr i32 %lshr0, %sh_prom2 + %or = or i32 %shr3, %shl + %conv4 = trunc i32 %or to i8 + ret i8 %conv4 +} + +define zeroext i8 @shld8x_sh32(i8 zeroext %a, i8 zeroext %b, i32 %bits) nounwind { + +; CHECK-LABEL: shld8x_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %edx +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld8x_sh32: +; CHECK64: shll $24, %esi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %shl = shl i32 %conv, %bits + %conv1 = zext i8 %b to i32 + %lshr0 = lshr i32 %conv1, 1 + %xor1 = xor i32 %bits, 7 + %shr3 = lshr i32 %lshr0, %xor1 + %or = or i32 %shr3, %shl + %conv4 = trunc i32 %or to i8 + ret i8 %conv4 +} + +define zeroext i8 @shld8x_sh16(i8 zeroext %a, i8 zeroext %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shld8x_sh16: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %edx +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld8x_sh16: +; CHECK64: shll $24, %esi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %conv1 = zext i16 %bits to i32 + %shl = shl i32 %conv, %conv1 + %conv2 = zext i8 %b to i32 + %lshr0 = lshr i32 %conv2, 1 + %xor1 = xor i16 %bits, 7 + %sh_prom = zext i16 %xor1 to i32 + %shr4 = lshr i32 %lshr0, %sh_prom + %or = or i32 %shr4, %shl + %conv5 = trunc i32 %or to i8 + ret i8 %conv5 +} + +define zeroext i8 @shld8x_sh8(i8 zeroext %a, i8 zeroext %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shld8x_sh8: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %edx +; CHECK-NEXT: shldl %cl, %edx, %eax +; CHECK: retl + +; CHECK64-LABEL: shld8x_sh8: +; CHECK64: shll $24, %esi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shldl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %conv1 = zext i8 %bits to i32 + %shl = shl i32 %conv, %conv1 + %conv2 = zext i8 %b to i32 + %lshr0 = lshr i32 %conv2, 1 + %xor1 = xor i8 %bits, 7 + %sh_prom = zext i8 %xor1 to i32 + %shr4 = lshr i32 %lshr0, %sh_prom + %or = or i32 %shr4, %shl + %conv5 = trunc i32 %or to i8 + ret i8 %conv5 +} + +;------------------------------------------------------------------------------------- +; double shift right with xor pattern +;uint64_t shrdx(uint64_t a, uint64_t b, shift_t bits) +;{ +; return (a >> bits) | ((b << 1) << (bits ^ (sizeof(a)*8 - 1))); +;} + +define i32 @shrd32x_sh64(i32 %a, i32 %b, i64 %bits) nounwind { + +; CHECK-LABEL: shrd32x_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: retl + +; CHECK64-LABEL: shrd32x_sh64: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %sh_prom = trunc i64 %bits to i32 + %shr = lshr i32 %a, %sh_prom + %shl = shl i32 %b, 1 + %sh_prom1 = xor i32 %sh_prom, 31 + %shl2 = shl i32 %shl, %sh_prom1 + %or = or i32 %shl2, %shr + ret i32 %or +} + +define i32 @shrd32x_sh32(i32 %a, i32 %b, i32 %bits) nounwind { + +; CHECK-LABEL: shrd32x_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: retl + +; CHECK64-LABEL: shrd32x_sh32: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %shr = lshr i32 %a, %bits + %shl = shl i32 %b, 1 + %xor0 = xor i32 %bits, 31 + %shl1 = shl i32 %shl, %xor0 + %or = or i32 %shl1, %shr + ret i32 %or +} + +define i32 @shrd32x_sh16(i32 %a, i32 %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd32x_sh16: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: retl + +; CHECK64-LABEL: shrd32x_sh16: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %bits to i32 + %shr = lshr i32 %a, %conv + %shl = shl i32 %b, 1 + %xor0 = xor i16 %bits, 31 + %sh_prom = zext i16 %xor0 to i32 + %shl2 = shl i32 %shl, %sh_prom + %or = or i32 %shl2, %shr + ret i32 %or +} + +define i32 @shrd32x_sh8(i32 %a, i32 %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd32x_sh8: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: retl + +; CHECK64-LABEL: shrd32x_sh8: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %bits to i32 + %shr = lshr i32 %a, %conv + %shl = shl i32 %b, 1 + %xor0 = xor i8 %bits, 31 + %sh_prom = zext i8 %xor0 to i32 + %shl2 = shl i32 %shl, %sh_prom + %or = or i32 %shl2, %shr + ret i32 %or +} + +define zeroext i16 @shrd16x_sh64(i16 zeroext %a, i16 zeroext %b, i64 %bits) nounwind { + +; CHECK-LABEL: shrd16x_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shrdw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shrd16x_sh64: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %sh_prom = trunc i64 %bits to i32 + %shr = lshr i32 %conv, %sh_prom + %conv1 = zext i16 %b to i32 + %shl = shl nuw nsw i32 %conv1, 1 + %sh_prom2 = xor i32 %sh_prom, 15 + %shl3 = shl i32 %shl, %sh_prom2 + %or = or i32 %shl3, %shr + %conv4 = trunc i32 %or to i16 + ret i16 %conv4 +} + +define zeroext i16 @shrd16x_sh32(i16 zeroext %a, i16 zeroext %b, i32 %bits) nounwind { + +; CHECK-LABEL: shrd16x_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shrdw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shrd16x_sh32: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %shr = lshr i32 %conv, %bits + %conv1 = zext i16 %b to i32 + %shl = shl nuw nsw i32 %conv1, 1 + %xor0 = xor i32 %bits, 15 + %shl3 = shl i32 %shl, %xor0 + %or = or i32 %shl3, %shr + %conv4 = trunc i32 %or to i16 + ret i16 %conv4 +} + +define zeroext i16 @shrd16x_sh16(i16 zeroext %a, i16 zeroext %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd16x_sh16: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shrdw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shrd16x_sh16: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %conv1 = zext i16 %bits to i32 + %shr = lshr i32 %conv, %conv1 + %conv2 = zext i16 %b to i32 + %shl = shl nuw nsw i32 %conv2, 1 + %xor0 = xor i16 %bits, 15 + %sh_prom = zext i16 %xor0 to i32 + %shl4 = shl i32 %shl, %sh_prom + %or = or i32 %shl4, %shr + %conv5 = trunc i32 %or to i16 + ret i16 %conv5 +} + +define zeroext i16 @shrd16x_sh8(i16 zeroext %a, i16 zeroext %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd16x_sh8: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shrdw %cl, %dx, %ax +; CHECK: retl + +; CHECK64-LABEL: shrd16x_sh8: +; CHECK64: movl %edx, %ecx +; CHECK64-NEXT: shrdw %cl, %si, %di +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i16 %a to i32 + %conv1 = zext i8 %bits to i32 + %shr = lshr i32 %conv, %conv1 + %conv2 = zext i16 %b to i32 + %shl = shl nuw nsw i32 %conv2, 1 + %xor0 = xor i8 %bits, 15 + %sh_prom = zext i8 %xor0 to i32 + %shl4 = shl i32 %shl, %sh_prom + %or = or i32 %shl4, %shr + %conv5 = trunc i32 %or to i16 + ret i16 %conv5 +} + +define zeroext i8 @shrd8x_sh64(i8 zeroext %a, i8 zeroext %b, i64 %bits) nounwind { + +; CHECK-LABEL: shrd8x_sh64: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: shrl $24, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd8x_sh64: +; CHECK64: shll $24, %edi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: shrl $24, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %sh_prom = trunc i64 %bits to i32 + %shr = lshr i32 %conv, %sh_prom + %conv1 = zext i8 %b to i32 + %shl = shl nuw nsw i32 %conv1, 1 + %sh_prom2 = xor i32 %sh_prom, 7 + %shl3 = shl i32 %shl, %sh_prom2 + %or = or i32 %shl3, %shr + %conv4 = trunc i32 %or to i8 + ret i8 %conv4 +} + +define zeroext i8 @shrd8x_sh32(i8 zeroext %a, i8 zeroext %b, i32 %bits) nounwind { + +; CHECK-LABEL: shrd8x_sh32: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: shrl $24, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd8x_sh32: +; CHECK64: shll $24, %edi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: shrl $24, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %shr = lshr i32 %conv, %bits + %conv1 = zext i8 %b to i32 + %shl = shl nuw nsw i32 %conv1, 1 + %xor0 = xor i32 %bits, 7 + %shl3 = shl i32 %shl, %xor0 + %or = or i32 %shl3, %shr + %conv4 = trunc i32 %or to i8 + ret i8 %conv4 +} + +define zeroext i8 @shrd8x_sh16(i8 zeroext %a, i8 zeroext %b, i16 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd8x_sh16: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: shrl $24, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd8x_sh16: +; CHECK64: shll $24, %edi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: shrl $24, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %conv1 = zext i16 %bits to i32 + %shr = lshr i32 %conv, %conv1 + %conv2 = zext i8 %b to i32 + %shl = shl nuw nsw i32 %conv2, 1 + %xor0 = xor i16 %bits, 7 + %sh_prom = zext i16 %xor0 to i32 + %shl4 = shl i32 %shl, %sh_prom + %or = or i32 %shl4, %shr + %conv5 = trunc i32 %or to i8 + ret i8 %conv5 +} + +define zeroext i8 @shrd8x_sh8(i8 zeroext %a, i8 zeroext %b, i8 zeroext %bits) nounwind { + +; CHECK-LABEL: shrd8x_sh8: +; CHECK: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax +; CHECK-NEXT: shrl $24, %eax +; CHECK: retl + +; CHECK64-LABEL: shrd8x_sh8: +; CHECK64: shll $24, %edi +; CHECK64-NEXT: movl %edx, %ecx +; CHECK64-NEXT: shrdl %cl, %esi, %edi +; CHECK64-NEXT: shrl $24, %edi +; CHECK64-NEXT: movl %edi, %eax +; CHECK64: retq + + %conv = zext i8 %a to i32 + %conv1 = zext i8 %bits to i32 + %shr = lshr i32 %conv, %conv1 + %conv2 = zext i8 %b to i32 + %shl = shl nuw nsw i32 %conv2, 1 + %xor0 = xor i8 %bits, 7 + %sh_prom = zext i8 %xor0 to i32 + %shl4 = shl i32 %shl, %sh_prom + %or = or i32 %shl4, %shr + %conv5 = trunc i32 %or to i8 + ret i8 %conv5 +}