Index: include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- include/llvm/IR/IntrinsicsAArch64.td +++ include/llvm/IR/IntrinsicsAArch64.td @@ -38,12 +38,6 @@ def int_aarch64_hint : Intrinsic<[], [llvm_i32_ty]>; //===----------------------------------------------------------------------===// -// RBIT - -def int_aarch64_rbit : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], - [IntrNoMem]>; - -//===----------------------------------------------------------------------===// // Data Barrier Instructions def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, Intrinsic<[], [llvm_i32_ty]>; Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -77,6 +77,11 @@ switch (Name[0]) { default: break; case 'a': { + if (Name.startswith("aarch64.rbit")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, + F->arg_begin()->getType()); + return true; + } if (Name.startswith("arm.neon.vclz")) { Type* args[2] = { F->arg_begin()->getType(), @@ -1761,6 +1766,11 @@ return; } + case Intrinsic::bitreverse: + CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); + CI->eraseFromParent(); + return; + case Intrinsic::ctlz: case Intrinsic::cttz: assert(CI->getNumArgOperands() == 1 && Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -159,6 +159,8 @@ setOperationAction(ISD::SETCC, MVT::i64, Custom); setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::i64, Custom); Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -951,10 +951,7 @@ defm CLS : OneOperandData<0b101, "cls">; defm CLZ : OneOperandData<0b100, "clz", ctlz>; -defm RBIT : OneOperandData<0b000, "rbit">; - -def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>; -def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>; +defm RBIT : OneOperandData<0b000, "rbit", bitreverse>; def REV16Wr : OneWRegData<0b001, "rev16", UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; Index: test/CodeGen/AArch64/bitreverse.ll =================================================================== --- test/CodeGen/AArch64/bitreverse.ll +++ test/CodeGen/AArch64/bitreverse.ll @@ -1,14 +1,18 @@ ; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s -; These tests just check that the plumbing is in place for @llvm.bitreverse. The -; actual output is massive at the moment as llvm.bitreverse is not yet legal. +; These tests just check that the plumbing is in place for @llvm.bitreverse. declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone define <2 x i16> @f(<2 x i16> %a) { ; CHECK-LABEL: f: -; CHECK: rev32 -; CHECK: ushr +; CHECK: fmov [[REG1:w[0-9]+]], s0 +; CHECK-DAG: rbit [[REG2:w[0-9]+]], [[REG1]] +; CHECK-DAG: fmov s0, [[REG2]] +; CHECK-DAG: mov [[REG3:w[0-9]+]], v0.s[1] +; CHECK-DAG: rbit [[REG4:w[0-9]+]], [[REG3]] +; CHECK-DAG: ins v0.s[1], [[REG4]] +; CHECK-DAG: ushr v0.2s, v0.2s, #16 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) ret <2 x i16> %b } @@ -17,26 +21,9 @@ define i8 @g(i8 %a) { ; CHECK-LABEL: g: -; CHECK-DAG: rev [[RV:w.*]], w0 -; CHECK-DAG: and [[L4:w.*]], [[RV]], #0xf0f0f0f -; CHECK-DAG: and [[H4:w.*]], [[RV]], #0xf0f0f0f0 -; CHECK-DAG: lsr [[S4:w.*]], [[H4]], #4 -; CHECK-DAG: orr [[R4:w.*]], [[S4]], [[L4]], lsl #4 - -; CHECK-DAG: and [[L2:w.*]], [[R4]], #0x33333333 -; CHECK-DAG: and [[H2:w.*]], [[R4]], #0xcccccccc -; CHECK-DAG: lsr [[S2:w.*]], [[H2]], #2 -; CHECK-DAG: orr [[R2:w.*]], [[S2]], [[L2]], lsl #2 - -; CHECK-DAG: mov [[P1:w.*]], #1426063360 -; CHECK-DAG: mov [[N1:w.*]], #-1442840576 -; CHECK-DAG: and [[L1:w.*]], [[R2]], [[P1]] -; CHECK-DAG: and [[H1:w.*]], [[R2]], [[N1]] -; CHECK-DAG: lsr [[S1:w.*]], [[H1]], #1 -; CHECK-DAG: orr [[R1:w.*]], [[S1]], [[L1]], lsl #1 - -; CHECK-DAG: lsr w0, [[R1]], #24 -; CHECK-DAG: ret +; CHECK: rbit [[REG:w[0-9]+]], w0 +; CHECK-NEXT: lsr w0, [[REG]], #24 +; CHECK-NEXT: ret %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b } Index: test/CodeGen/AArch64/rbit.ll =================================================================== --- test/CodeGen/AArch64/rbit.ll +++ test/CodeGen/AArch64/rbit.ll @@ -1,5 +1,8 @@ ; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s +;; The first two checks make sure we can lower the target-specific intrinsic to +;; a BITREVERSE DAG node. + ; CHECK-LABEL: rbit32 ; CHECK: rbit w0, w0 define i32 @rbit32(i32 %t) { @@ -18,3 +21,22 @@ declare i64 @llvm.aarch64.rbit.i64(i64) declare i32 @llvm.aarch64.rbit.i32(i32) + +; CHECK-LABEL: rbit_generic32 +; CHECK: rbit w0, w0 +define i32 @rbit_generic32(i32 %t) { +entry: + %rbit = call i32 @llvm.bitreverse.i32(i32 %t) + ret i32 %rbit +} + +; CHECK-LABEL: rbit_generic64 +; CHECK: rbit x0, x0 +define i64 @rbit_generic64(i64 %t) { +entry: + %rbit = call i64 @llvm.bitreverse.i64(i64 %t) + ret i64 %rbit +} + +declare i32 @llvm.bitreverse.i32(i32) readnone +declare i64 @llvm.bitreverse.i64(i64) readnone