diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -39,6 +39,8 @@ BUILTIN(__builtin_arm_rbit64, "WUiWUi", "nc") BUILTIN(__builtin_arm_cls, "UiZUi", "nc") BUILTIN(__builtin_arm_cls64, "UiWUi", "nc") +BUILTIN(__builtin_arm_clz, "UiZUi", "nc") +BUILTIN(__builtin_arm_clz64, "UiWUi", "nc") // HINT BUILTIN(__builtin_arm_nop, "v", "") diff --git a/clang/include/clang/Basic/BuiltinsARM.def b/clang/include/clang/Basic/BuiltinsARM.def --- a/clang/include/clang/Basic/BuiltinsARM.def +++ b/clang/include/clang/Basic/BuiltinsARM.def @@ -119,6 +119,8 @@ // Bit manipulation BUILTIN(__builtin_arm_rbit, "UiUi", "nc") +BUILTIN(__builtin_arm_clz, "UiZUi", "nc") +BUILTIN(__builtin_arm_clz64, "UiWUi", "nc") BUILTIN(__builtin_arm_cls, "UiZUi", "nc") BUILTIN(__builtin_arm_cls64, "UiWUi", "nc") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7948,6 +7948,17 @@ CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == clang::ARM::BI__builtin_arm_clz || + BuiltinID == clang::ARM::BI__builtin_arm_clz64) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType()); + Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); + if (BuiltinID == clang::ARM::BI__builtin_arm_clz64) + Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); + return Res; + } + + if (BuiltinID == clang::ARM::BI__builtin_arm_cls) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls"); @@ -10030,6 +10041,16 @@ CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == clang::AArch64::BI__builtin_arm_clz || + BuiltinID == clang::AArch64::BI__builtin_arm_clz64) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType()); + Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); + if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64) + Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); + return Res; + } + if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg, diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -140,17 +140,21 @@ /* CLZ */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __clz(uint32_t __t) { - return (uint32_t)__builtin_clz(__t); + return __builtin_arm_clz(__t); } static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __clzl(unsigned long __t) { - return (unsigned long)__builtin_clzl(__t); +#if __SIZEOF_LONG__ == 4 + return __builtin_arm_clz(__t); +#else + return __builtin_arm_clz64(__t); +#endif } static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __clzll(uint64_t __t) { - return (uint64_t)__builtin_clzll(__t); + return __builtin_arm_clz64(__t); } /* CLS */ diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -344,8 +344,8 @@ // AArch64-LABEL: @test_clzl( // AArch64-NEXT: entry: // AArch64-NEXT: [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[T:%.*]], i1 false) -// AArch64-NEXT: [[CAST_I:%.*]] = trunc i64 [[TMP0]] to i32 -// AArch64-NEXT: [[CONV_I:%.*]] = sext i32 [[CAST_I]] to i64 +// AArch64-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 +// AArch64-NEXT: [[CONV_I:%.*]] = zext i32 [[TMP1]] to i64 // AArch64-NEXT: ret i64 [[CONV_I]] // long test_clzl(long t) { @@ -355,8 +355,8 @@ // ARM-LABEL: @test_clzll( // ARM-NEXT: entry: // ARM-NEXT: [[TMP0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[T:%.*]], i1 false) -// ARM-NEXT: [[CAST_I:%.*]] = trunc i64 [[TMP0]] to i32 -// ARM-NEXT: [[CONV_I:%.*]] = sext i32 [[CAST_I]] to i64 +// ARM-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 +// ARM-NEXT: [[CONV_I:%.*]] = zext i32 [[TMP1]] to i64 // ARM-NEXT: ret i64 [[CONV_I]] // uint64_t test_clzll(uint64_t t) {