Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -138,6 +138,7 @@ // Match BITREVERSE to customized fast code sequence in the td file. setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td @@ -4522,3 +4522,122 @@ def : Pat<(i32 (bitreverse i32:$A)), (RLDICL_32 RotateInsertByte1.Left, 0, 32)>; + +// Fast 64-bit reverse bits algorithm: +// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): +// n = ((n >> 1) & 0x5555555555555555) | ((n << 1) & 0xAAAAAAAAAAAAAAAA); +// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): +// n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xCCCCCCCCCCCCCCCC); +// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): +// n = ((n >> 4) & 0x0F0F0F0F0F0F0F0F) | ((n << 4) & 0xF0F0F0F0F0F0F0F0); +// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4,B5,B6,B7,B8]): +// Apply the same byte reverse algorithm mentioned above for the fast 32-bit +// reverse to both the high 32 bit and low 32 bit of the 64 bit value. And +// then OR them together to get the final result. +def MaskValues64 { + dag Lo1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo1, sub_32)); + dag Hi1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi1, sub_32)); + dag Lo2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo2, sub_32)); + dag Hi2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi2, sub_32)); + dag Lo4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo4, sub_32)); + dag Hi4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi4, sub_32)); +} + +def DWMaskValues { + dag Lo1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo1, 32, 31), 0x5555), 0x5555); + dag Hi1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi1, 32, 31), 0xAAAA), 0xAAAA); + dag Lo2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo2, 32, 31), 0x3333), 0x3333); + dag Hi2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi2, 32, 31), 0xCCCC), 0xCCCC); + dag Lo4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo4, 32, 31), 0x0F0F), 0x0F0F); + dag Hi4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi4, 32, 31), 0xF0F0), 0xF0F0); +} + +def DWShift1 { + dag Right = (RLDICL $A, 63, 1); + dag Left = (RLDICR $A, 1, 62); +} + +def DWSwap1 { + dag Bit = (OR8 (AND8 DWShift1.Right, DWMaskValues.Lo1), + (AND8 DWShift1.Left, DWMaskValues.Hi1)); +} + +def DWShift2 { + dag Right = (RLDICL DWSwap1.Bit, 62, 2); + dag Left = (RLDICR DWSwap1.Bit, 2, 61); +} + +def DWSwap2 { + dag Bits = (OR8 (AND8 DWShift2.Right, DWMaskValues.Lo2), + (AND8 DWShift2.Left, DWMaskValues.Hi2)); +} + +def DWShift4 { + dag Right = (RLDICL DWSwap2.Bits, 60, 4); + dag Left = (RLDICR DWSwap2.Bits, 4, 59); +} + +def DWSwap4 { + dag Bits = (OR8 (AND8 DWShift4.Right, DWMaskValues.Lo4), + (AND8 DWShift4.Left, DWMaskValues.Hi4)); +} + +// Bit swap is done, now start byte swap. +def DWExtractLo32 { + dag SubReg = (i32 (EXTRACT_SUBREG DWSwap4.Bits, sub_32)); +} + +def DWRotateLo32 { + dag Left24 = (RLWINM DWExtractLo32.SubReg, 24, 0, 31); +} + +def DWLo32RotateInsertByte3 { + dag Left = (RLWIMI DWRotateLo32.Left24, DWExtractLo32.SubReg, 8, 8, 15); +} + +// Lower 32 bits in the right order +def DWLo32RotateInsertByte1 { + dag Left = + (RLWIMI DWLo32RotateInsertByte3.Left, DWExtractLo32.SubReg, 8, 24, 31); +} + +def ExtendLo32 { + dag To64Bit = + (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + DWLo32RotateInsertByte1.Left, sub_32)); +} + +def DWShiftHi32 { // SRDI DWSwap4.Bits, 32) + dag ToLo32 = (RLDICL DWSwap4.Bits, 32, 32); +} + +def DWExtractHi32 { + dag SubReg = (i32 (EXTRACT_SUBREG DWShiftHi32.ToLo32, sub_32)); +} + +def DWRotateHi32 { + dag Left24 = (RLWINM DWExtractHi32.SubReg, 24, 0, 31); +} + +def DWHi32RotateInsertByte3 { + dag Left = (RLWIMI DWRotateHi32.Left24, DWExtractHi32.SubReg, 8, 8, 15); +} + +// High 32 bits in the right order, but in the low 32-bit position +def DWHi32RotateInsertByte1 { + dag Left = + (RLWIMI DWHi32RotateInsertByte3.Left, DWExtractHi32.SubReg, 8, 24, 31); +} + +def ExtendHi32 { + dag To64Bit = + (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + DWHi32RotateInsertByte1.Left, sub_32)); +} + +def DWShiftLo32 { // SLDI ExtendHi32.To64Bit, 32 + dag ToHi32 = (RLDICR ExtendHi32.To64Bit, 32, 31); +} + +def : Pat<(i64 (bitreverse i64:$A)), + (OR8 DWShiftLo32.ToHi32, ExtendLo32.To64Bit)>; Index: llvm/trunk/test/CodeGen/PowerPC/pr33093.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/pr33093.ll +++ llvm/trunk/test/CodeGen/PowerPC/pr33093.ll @@ -65,3 +65,101 @@ %or22 = or i32 %or19, %shl18 ret i32 %or22 } + +define i64 @ReverseBits64(i64 %n) { +; CHECK-LABEL: ReverseBits64: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lis 4, -21846 +; CHECK-NEXT: lis 5, 21845 +; CHECK-NEXT: lis 6, -13108 +; CHECK-NEXT: lis 7, 13107 +; CHECK-NEXT: sldi 8, 3, 1 +; CHECK-NEXT: rldicl 3, 3, 63, 1 +; CHECK-NEXT: ori 4, 4, 43690 +; CHECK-NEXT: ori 5, 5, 21845 +; CHECK-NEXT: ori 6, 6, 52428 +; CHECK-NEXT: ori 7, 7, 13107 +; CHECK-NEXT: sldi 4, 4, 32 +; CHECK-NEXT: sldi 5, 5, 32 +; CHECK-NEXT: oris 4, 4, 43690 +; CHECK-NEXT: oris 5, 5, 21845 +; CHECK-NEXT: ori 4, 4, 43690 +; CHECK-NEXT: ori 5, 5, 21845 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: sldi 5, 6, 32 +; CHECK-NEXT: sldi 6, 7, 32 +; CHECK-NEXT: and 4, 8, 4 +; CHECK-NEXT: lis 7, 3855 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: oris 12, 5, 52428 +; CHECK-NEXT: oris 9, 6, 13107 +; CHECK-NEXT: lis 6, -3856 +; CHECK-NEXT: ori 7, 7, 3855 +; CHECK-NEXT: sldi 8, 3, 2 +; CHECK-NEXT: ori 4, 12, 52428 +; CHECK-NEXT: rldicl 3, 3, 62, 2 +; CHECK-NEXT: ori 5, 9, 13107 +; CHECK-NEXT: ori 6, 6, 61680 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: sldi 5, 6, 32 +; CHECK-NEXT: and 4, 8, 4 +; CHECK-NEXT: sldi 6, 7, 32 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: oris 10, 5, 61680 +; CHECK-NEXT: oris 11, 6, 3855 +; CHECK-NEXT: sldi 6, 3, 4 +; CHECK-NEXT: ori 4, 10, 61680 +; CHECK-NEXT: rldicl 3, 3, 60, 4 +; CHECK-NEXT: ori 5, 11, 3855 +; CHECK-NEXT: and 4, 6, 4 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: rldicl 4, 3, 32, 32 +; CHECK-NEXT: rlwinm 6, 3, 24, 0, 31 +; CHECK-NEXT: rlwinm 5, 4, 24, 0, 31 +; CHECK-NEXT: rlwimi 6, 3, 8, 8, 15 +; CHECK-NEXT: rlwimi 5, 4, 8, 8, 15 +; CHECK-NEXT: rlwimi 6, 3, 8, 24, 31 +; CHECK-NEXT: rlwimi 5, 4, 8, 24, 31 +; CHECK-NEXT: sldi 12, 5, 32 +; CHECK-NEXT: or 3, 12, 6 +; CHECK-NEXT: blr +entry: + %shr = lshr i64 %n, 1 + %and = and i64 %shr, 6148914691236517205 + %and1 = shl i64 %n, 1 + %shl = and i64 %and1, -6148914691236517206 + %or = or i64 %and, %shl + %shr2 = lshr i64 %or, 2 + %and3 = and i64 %shr2, 3689348814741910323 + %and4 = shl i64 %or, 2 + %shl5 = and i64 %and4, -3689348814741910324 + %or6 = or i64 %and3, %shl5 + %shr7 = lshr i64 %or6, 4 + %and8 = and i64 %shr7, 1085102592571150095 + %and9 = shl i64 %or6, 4 + %shl10 = and i64 %and9, -1085102592571150096 + %or11 = or i64 %and8, %shl10 + %shr13 = lshr i64 %or11, 56 + %and14 = lshr i64 %or11, 40 + %shr15 = and i64 %and14, 65280 + %and17 = lshr i64 %or11, 24 + %shr18 = and i64 %and17, 16711680 + %and20 = lshr i64 %or11, 8 + %shr21 = and i64 %and20, 4278190080 + %and23 = shl i64 %or11, 8 + %shl24 = and i64 %and23, 1095216660480 + %and26 = shl i64 %or11, 24 + %shl27 = and i64 %and26, 280375465082880 + %and29 = shl i64 %or11, 40 + %shl30 = and i64 %and29, 71776119061217280 + %shl33 = shl i64 %or11, 56 + %or16 = or i64 %shl33, %shr13 + %or19 = or i64 %or16, %shr15 + %or22 = or i64 %or19, %shr18 + %or25 = or i64 %or22, %shr21 + %or28 = or i64 %or25, %shl24 + %or31 = or i64 %or28, %shl27 + %or34 = or i64 %or31, %shl30 + ret i64 %or34 +} Index: llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll +++ llvm/trunk/test/CodeGen/PowerPC/testBitReverse.ll @@ -40,3 +40,66 @@ %res = call i32 @llvm.bitreverse.i32(i32 %arg) ret i32 %res } + +declare i64 @llvm.bitreverse.i64(i64) +define i64 @testBitReverseIntrinsicI64(i64 %arg) { +; CHECK-LABEL: testBitReverseIntrinsicI64: +; CHECK: # BB#0: +; CHECK-NEXT: lis 4, -21846 +; CHECK-NEXT: lis 5, 21845 +; CHECK-NEXT: lis 6, -13108 +; CHECK-NEXT: lis 7, 13107 +; CHECK-NEXT: sldi 8, 3, 1 +; CHECK-NEXT: rldicl 3, 3, 63, 1 +; CHECK-NEXT: ori 4, 4, 43690 +; CHECK-NEXT: ori 5, 5, 21845 +; CHECK-NEXT: ori 6, 6, 52428 +; CHECK-NEXT: ori 7, 7, 13107 +; CHECK-NEXT: sldi 4, 4, 32 +; CHECK-NEXT: sldi 5, 5, 32 +; CHECK-NEXT: oris 4, 4, 43690 +; CHECK-NEXT: oris 5, 5, 21845 +; CHECK-NEXT: ori 4, 4, 43690 +; CHECK-NEXT: ori 5, 5, 21845 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: sldi 5, 6, 32 +; CHECK-NEXT: sldi 6, 7, 32 +; CHECK-NEXT: and 4, 8, 4 +; CHECK-NEXT: lis 7, 3855 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: oris 12, 5, 52428 +; CHECK-NEXT: oris 9, 6, 13107 +; CHECK-NEXT: lis 6, -3856 +; CHECK-NEXT: ori 7, 7, 3855 +; CHECK-NEXT: sldi 8, 3, 2 +; CHECK-NEXT: ori 4, 12, 52428 +; CHECK-NEXT: rldicl 3, 3, 62, 2 +; CHECK-NEXT: ori 5, 9, 13107 +; CHECK-NEXT: ori 6, 6, 61680 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: sldi 5, 6, 32 +; CHECK-NEXT: and 4, 8, 4 +; CHECK-NEXT: sldi 6, 7, 32 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: oris 10, 5, 61680 +; CHECK-NEXT: oris 11, 6, 3855 +; CHECK-NEXT: sldi 6, 3, 4 +; CHECK-NEXT: ori 4, 10, 61680 +; CHECK-NEXT: rldicl 3, 3, 60, 4 +; CHECK-NEXT: ori 5, 11, 3855 +; CHECK-NEXT: and 4, 6, 4 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: rldicl 4, 3, 32, 32 +; CHECK-NEXT: rlwinm 6, 3, 24, 0, 31 +; CHECK-NEXT: rlwinm 5, 4, 24, 0, 31 +; CHECK-NEXT: rlwimi 6, 3, 8, 8, 15 +; CHECK-NEXT: rlwimi 5, 4, 8, 8, 15 +; CHECK-NEXT: rlwimi 6, 3, 8, 24, 31 +; CHECK-NEXT: rlwimi 5, 4, 8, 24, 31 +; CHECK-NEXT: sldi 12, 5, 32 +; CHECK-NEXT: or 3, 12, 6 +; CHECK-NEXT: blr + %res = call i64 @llvm.bitreverse.i64(i64 %arg) + ret i64 %res +}