Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -136,6 +136,9 @@ addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } + // Match BITREVERSE to customized fast code sequence in the td file. + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -4450,3 +4450,71 @@ def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>; } // IsISA3_0 + +// Fast reverse bits algorithm: +// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): +// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA); +// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): +// n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xCCCCCCCC); +// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): +// n = ((n >> 4) & 0x0F0F0F0F) | ((n << 4) & 0xF0F0F0F0); +// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4]): +// Step 4.1: Put B4,B2 in the right position (rotate left 3 bytes): +// n' = (n rotl 24); After which n' = [B4, B1, B2, B3] +// Step 4.2: Insert B3 to the right position: +// n' = rlwimi n', n, 8, 8, 15; After which n' = [B4, B3, B2, B3] +// Step 4.3: Insert B1 to the right position: +// n' = rlwimi n', n, 8, 24, 31; After which n' = [B4, B3, B2, B1] +def MaskValues { + dag Lo1 = (ORI (LIS 0x5555), 0x5555); + dag Hi1 = (ORI (LIS 0xAAAA), 0xAAAA); + dag Lo2 = (ORI (LIS 0x3333), 0x3333); + dag Hi2 = (ORI (LIS 0xCCCC), 0xCCCC); + dag Lo4 = (ORI (LIS 0x0F0F), 0x0F0F); + dag Hi4 = (ORI (LIS 0xF0F0), 0xF0F0); +} + +def Shift1 { + dag Right = (RLWINM $A, 31, 1, 31); + dag Left = (RLWINM $A, 1, 0, 30); +} + +def Swap1 { + dag Bit = (OR (AND Shift1.Right, MaskValues.Lo1), + (AND Shift1.Left, MaskValues.Hi1)); +} + +def Shift2 { + dag Right = (RLWINM Swap1.Bit, 30, 2, 31); + dag Left = (RLWINM Swap1.Bit, 2, 0, 29); +} + +def Swap2 { + dag Bits = (OR (AND Shift2.Right, MaskValues.Lo2), + (AND Shift2.Left, MaskValues.Hi2)); +} + +def Shift4 { + dag Right = (RLWINM Swap2.Bits, 28, 4, 31); + dag Left = (RLWINM Swap2.Bits, 4, 0, 27); +} + +def Swap4 { + dag Bits = (OR (AND Shift4.Right, MaskValues.Lo4), + (AND Shift4.Left, MaskValues.Hi4)); +} + +def Rotate { + dag Left3Bytes = (RLWINM Swap4.Bits, 24, 0, 31); +} + +def RotateInsertByte3 { + dag Left = (RLWIMI Rotate.Left3Bytes, Swap4.Bits, 8, 8, 15); +} + +def RotateInsertByte1 { + dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31); +} + +def : Pat<(i32 (bitreverse i32:$A)), + (RLDICL_32 RotateInsertByte1.Left, 0, 32)>; Index: test/CodeGen/PowerPC/bitreverse.ll =================================================================== --- test/CodeGen/PowerPC/bitreverse.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc -verify-machineinstrs -march=ppc64 %s -o - | FileCheck %s - -; These tests just check that the plumbing is in place for @llvm.bitreverse. The -; actual output is massive at the moment as llvm.bitreverse is not yet legal. - -declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone - -define <2 x i16> @f(<2 x i16> %a) { -; CHECK-LABEL: f: -; CHECK: rlwinm - %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) - ret <2 x i16> %b -} - -declare i8 @llvm.bitreverse.i8(i8) readnone - -define i8 @g(i8 %a) { -; CHECK-LABEL: g: -; CHECK: rlwinm -; CHECK: rlwimi - %b = call i8 @llvm.bitreverse.i8(i8 %a) - ret i8 %b -} Index: test/CodeGen/PowerPC/pr33093.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/pr33093.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +define zeroext i32 @ReverseBits(i32 zeroext %n) { +; CHECK-LABEL: ReverseBits: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lis 4, -21846 +; CHECK-NEXT: lis 5, 21845 +; CHECK-NEXT: slwi 6, 3, 1 +; CHECK-NEXT: srwi 3, 3, 1 +; CHECK-NEXT: lis 7, -13108 +; CHECK-NEXT: lis 8, 13107 +; CHECK-NEXT: ori 4, 4, 43690 +; CHECK-NEXT: ori 5, 5, 21845 +; CHECK-NEXT: lis 10, -3856 +; CHECK-NEXT: lis 11, 3855 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: and 4, 6, 4 +; CHECK-NEXT: ori 5, 8, 13107 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: ori 4, 7, 52428 +; CHECK-NEXT: slwi 9, 3, 2 +; CHECK-NEXT: srwi 3, 3, 2 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: and 4, 9, 4 +; CHECK-NEXT: ori 5, 11, 3855 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: ori 4, 10, 61680 +; CHECK-NEXT: slwi 12, 3, 4 +; CHECK-NEXT: srwi 3, 3, 4 +; CHECK-NEXT: and 4, 12, 4 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: rotlwi 4, 3, 24 +; CHECK-NEXT: rlwimi 4, 3, 8, 8, 15 +; CHECK-NEXT: rlwimi 4, 3, 8, 24, 31 +; CHECK-NEXT: rldicl 3, 4, 0, 32 +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr +entry: + %shr = lshr i32 %n, 1 + %and = and i32 %shr, 1431655765 + %and1 = shl i32 %n, 1 + %shl = and i32 %and1, -1431655766 + %or = or i32 %and, %shl + %shr2 = lshr i32 %or, 2 + %and3 = and i32 %shr2, 858993459 + %and4 = shl i32 %or, 2 + %shl5 = and i32 %and4, -858993460 + %or6 = or i32 %and3, %shl5 + %shr7 = lshr i32 %or6, 4 + %and8 = and i32 %shr7, 252645135 + %and9 = shl i32 %or6, 4 + %shl10 = and i32 %and9, -252645136 + %or11 = or i32 %and8, %shl10 + %shr13 = lshr i32 %or11, 24 + %and14 = lshr i32 %or11, 8 + %shr15 = and i32 %and14, 65280 + %and17 = shl i32 %or11, 8 + %shl18 = and i32 %and17, 16711680 + %shl21 = shl i32 %or11, 24 + %or16 = or i32 %shl21, %shr13 + %or19 = or i32 %or16, %shr15 + %or22 = or i32 %or19, %shl18 + ret i32 %or22 +} Index: test/CodeGen/PowerPC/testBitReverse.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/testBitReverse.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +declare i32 @llvm.bitreverse.i32(i32) +define i32 @testBitReverseIntrinsicI32(i32 %arg) { +; CHECK-LABEL: testBitReverseIntrinsicI32: +; CHECK: # BB#0: +; CHECK-NEXT: lis 4, -21846 +; CHECK-NEXT: lis 5, 21845 +; CHECK-NEXT: slwi 6, 3, 1 +; CHECK-NEXT: srwi 3, 3, 1 +; CHECK-NEXT: lis 7, -13108 +; CHECK-NEXT: lis 8, 13107 +; CHECK-NEXT: ori 4, 4, 43690 +; CHECK-NEXT: ori 5, 5, 21845 +; CHECK-NEXT: lis 10, -3856 +; CHECK-NEXT: lis 11, 3855 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: and 4, 6, 4 +; CHECK-NEXT: ori 5, 8, 13107 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: ori 4, 7, 52428 +; CHECK-NEXT: slwi 9, 3, 2 +; CHECK-NEXT: srwi 3, 3, 2 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: and 4, 9, 4 +; CHECK-NEXT: ori 5, 11, 3855 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: ori 4, 10, 61680 +; CHECK-NEXT: slwi 12, 3, 4 +; CHECK-NEXT: srwi 3, 3, 4 +; CHECK-NEXT: and 4, 12, 4 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: rotlwi 4, 3, 24 +; CHECK-NEXT: rlwimi 4, 3, 8, 8, 15 +; CHECK-NEXT: rlwimi 4, 3, 8, 24, 31 +; CHECK-NEXT: rldicl 3, 4, 0, 32 +; CHECK-NEXT: blr + %res = call i32 @llvm.bitreverse.i32(i32 %arg) + ret i32 %res +}