diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -31,6 +31,8 @@ // builtins for compatibility with the XL compiler BUILTIN(__builtin_ppc_popcntb, "ULiULi", "") +BUILTIN(__builtin_ppc_poppar4, "iUi", "") +BUILTIN(__builtin_ppc_poppar8, "iULLi", "") BUILTIN(__builtin_ppc_eieio, "v", "") BUILTIN(__builtin_ppc_iospace_eieio, "v", "") BUILTIN(__builtin_ppc_isync, "v", "") @@ -87,6 +89,14 @@ BUILTIN(__builtin_ppc_rlwnm, "UiUiIUiIUi", "") BUILTIN(__builtin_ppc_rlwimi, "UiUiUiIUiIUi", "") BUILTIN(__builtin_ppc_rldimi, "ULLiULLiULLiIUiIULLi", "") +// load +BUILTIN(__builtin_ppc_load2r, "UiUs*", "") +BUILTIN(__builtin_ppc_load4r, "UiUi*", "") +BUILTIN(__builtin_ppc_load8r, "ULLiULLi*", "") +// store +BUILTIN(__builtin_ppc_store2r, "vUiUs*", "") +BUILTIN(__builtin_ppc_store4r, "vUiUi*", "") +BUILTIN(__builtin_ppc_store8r, "vULLiULLi*", "") BUILTIN(__builtin_ppc_get_timebase, "ULLi", "n") diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -89,6 +89,8 @@ static void defineXLCompatMacros(MacroBuilder &Builder) { Builder.defineMacro("__popcntb", "__builtin_ppc_popcntb"); + Builder.defineMacro("__poppar4", "__builtin_ppc_poppar4"); + Builder.defineMacro("__poppar8", "__builtin_ppc_poppar8"); Builder.defineMacro("__eieio", "__builtin_ppc_eieio"); Builder.defineMacro("__iospace_eieio", "__builtin_ppc_iospace_eieio"); Builder.defineMacro("__isync", "__builtin_ppc_isync"); @@ -143,6 +145,12 @@ Builder.defineMacro("__rlwnm", "__builtin_ppc_rlwnm"); Builder.defineMacro("__rlwimi", "__builtin_ppc_rlwimi"); Builder.defineMacro("__rldimi", "__builtin_ppc_rldimi"); + Builder.defineMacro("__load2r", "__builtin_ppc_load2r"); + Builder.defineMacro("__load4r", "__builtin_ppc_load4r"); + Builder.defineMacro("__load8r", "__builtin_ppc_load8r"); + Builder.defineMacro("__store2r", "__builtin_ppc_store2r"); + Builder.defineMacro("__store4r", "__builtin_ppc_store4r"); + Builder.defineMacro("__store8r", "__builtin_ppc_store8r"); } /// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15197,6 +15197,20 @@ Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[1]}); return Builder.CreateAnd(Shift, Ops[2]); } + case PPC::BI__builtin_ppc_poppar4: + case PPC::BI__builtin_ppc_poppar8: { + llvm::Type *ArgType = Ops[0]->getType(); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + Value *Tmp = Builder.CreateCall(F, Ops[0]); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return Result; + } + // Copy sign case PPC::BI__builtin_vsx_xvcpsgnsp: case PPC::BI__builtin_vsx_xvcpsgndp: { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3276,6 +3276,8 @@ case PPC::BI__builtin_ppc_maddhd: case PPC::BI__builtin_ppc_maddhdu: case PPC::BI__builtin_ppc_maddld: + case PPC::BI__builtin_ppc_load8r: + case PPC::BI__builtin_ppc_store8r: return true; } return false; diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-load-store-reversed-64bit-only.c b/clang/test/CodeGen/builtins-ppc-xlcompat-load-store-reversed-64bit-only.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-load-store-reversed-64bit-only.c @@ -0,0 +1,35 @@ +// REQUIRES: powerpc-registered-target. +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: not %clang_cc1 -triple powerpc-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 2>&1 | FileCheck %s --check-prefix=CHECK-32-ERROR + +extern unsigned long long ull; +extern unsigned long long *ull_addr; + +// CHECK-LABEL: @test_builtin_ppc_store8r( +// CHECK: [[TMP0:%.*]] = load i64, i64* @ull, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** @ull_addr, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[TMP1]] to i8* +// CHECK-NEXT: call void @llvm.ppc.store8r(i64 [[TMP0]], i8* [[TMP2]]) +// CHECK-NEXT: ret void + +// CHECK-32-ERROR: error: this builtin is only available on 64-bit targets +void test_builtin_ppc_store8r() { + __builtin_ppc_store8r(ull, ull_addr); +} + +// CHECK-LABEL: @test_builtin_ppc_load8r( +// CHECK: [[TMP0:%.*]] = load i64*, i64** @ull_addr, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.ppc.load8r(i8* [[TMP1]]) +// CHECK-NEXT: ret i64 [[TMP2]] + +// CHECK-32-ERROR: error: this builtin is only available on 64-bit targets +unsigned long long test_builtin_ppc_load8r() { + return __builtin_ppc_load8r(ull_addr); +} diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-load-store-reversed.c b/clang/test/CodeGen/builtins-ppc-xlcompat-load-store-reversed.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-load-store-reversed.c @@ -0,0 +1,86 @@ +// REQUIRES: powerpc-registered-target. +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s --check-prefixes=CHECK-32B +// RUN: %clang_cc1 -triple powerpc64-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s + +extern unsigned short us; +extern unsigned int ui; +extern unsigned short *us_addr; +extern unsigned int *ui_addr; + +// CHECK-LABEL: @test_builtin_ppc_store2r( +// CHECK: [[TMP0:%.*]] = load i16, i16* @us, align 2 +// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = load i16*, i16** @us_addr, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to i8* +// CHECK-NEXT: call void @llvm.ppc.store2r(i32 [[CONV]], i8* [[TMP2]]) +// CHECK-NEXT: ret void +// +// CHECK-32B-LABEL: @test_builtin_ppc_store2r( +// CHECK-32B: [[TMP0:%.*]] = load i16, i16* @us, align 2 +// CHECK-32B-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i32 +// CHECK-32B-NEXT: [[TMP1:%.*]] = load i16*, i16** @us_addr, align 4 +// CHECK-32B-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to i8* +// CHECK-32B-NEXT: call void @llvm.ppc.store2r(i32 [[CONV]], i8* [[TMP2]]) +// CHECK-32B-NEXT: ret void +// +void test_builtin_ppc_store2r() { + __builtin_ppc_store2r(us, us_addr); +} + +// CHECK-LABEL: @test_builtin_ppc_store4r( +// CHECK: [[TMP0:%.*]] = load i32, i32* @ui, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @ui_addr, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8* +// CHECK-NEXT: call void @llvm.ppc.store4r(i32 [[TMP0]], i8* [[TMP2]]) +// CHECK-NEXT: ret void +// +// CHECK-32B-LABEL: @test_builtin_ppc_store4r( +// CHECK-32B: [[TMP0:%.*]] = load i32, i32* @ui, align 4 +// CHECK-32B-NEXT: [[TMP1:%.*]] = load i32*, i32** @ui_addr, align 4 +// CHECK-32B-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8* +// CHECK-32B-NEXT: call void @llvm.ppc.store4r(i32 [[TMP0]], i8* [[TMP2]]) +// CHECK-32B-NEXT: ret void +// +void test_builtin_ppc_store4r() { + __builtin_ppc_store4r(ui, ui_addr); +} + +// CHECK-LABEL: @test_builtin_ppc_load2r( +// CHECK: [[TMP0:%.*]] = load i16*, i16** @us_addr, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.load2r(i8* [[TMP1]]) +// CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[TMP2]] to i16 +// CHECK-NEXT: ret i16 [[CONV]] +// +// CHECK-32B-LABEL: @test_builtin_ppc_load2r( +// CHECK-32B: [[TMP0:%.*]] = load i16*, i16** @us_addr, align 4 +// CHECK-32B-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP0]] to i8* +// CHECK-32B-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.load2r(i8* [[TMP1]]) +// CHECK-32B-NEXT: [[CONV:%.*]] = trunc i32 [[TMP2]] to i16 +// CHECK-32B-NEXT: ret i16 [[CONV]] +// +unsigned short test_builtin_ppc_load2r() { + return __builtin_ppc_load2r(us_addr); +} + +// CHECK-LABEL: @test_builtin_ppc_load4r( +// CHECK: [[TMP0:%.*]] = load i32*, i32** @ui_addr, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.load4r(i8* [[TMP1]]) +// CHECK-NEXT: ret i32 [[TMP2]] +// +// CHECK-32B-LABEL: @test_builtin_ppc_load4r( +// CHECK-32B: [[TMP0:%.*]] = load i32*, i32** @ui_addr, align 4 +// CHECK-32B-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* +// CHECK-32B-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.load4r(i8* [[TMP1]]) +// CHECK-32B-NEXT: ret i32 [[TMP2]] +// +unsigned int test_builtin_ppc_load4r() { + return __builtin_ppc_load4r(ui_addr); +} diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-popcnt.c b/clang/test/CodeGen/builtins-ppc-xlcompat-popcnt.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-popcnt.c @@ -0,0 +1,35 @@ +// REQUIRES: powerpc-registered-target. +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s + +extern unsigned int ui; +extern unsigned long long ull; + +// CHECK-LABEL: @test_builtin_ppc_poppar4( +// CHECK: [[TMP0:%.*]] = load i32, i32* @ui, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @ui, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 +// CHECK-NEXT: ret i32 [[TMP3]] +// +int test_builtin_ppc_poppar4() { + return __builtin_ppc_poppar4(ui); +} + +// CHECK-LABEL: @test_builtin_ppc_poppar8( +// CHECK: [[TMP0:%.*]] = load i64, i64* @ull, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @ull, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1 +// CHECK-NEXT: [[CAST:%.*]] = trunc i64 [[TMP3]] to i32 +// CHECK-NEXT: ret i32 [[CAST]] +// +int test_builtin_ppc_poppar8() { + return __builtin_ppc_poppar8(ull); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1598,6 +1598,26 @@ def int_ppc_maddld : GCCBuiltin<"__builtin_ppc_maddld">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + // load + def int_ppc_load2r + : GCCBuiltin<"__builtin_ppc_load2r">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; + def int_ppc_load4r + : GCCBuiltin<"__builtin_ppc_load4r">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; + def int_ppc_load8r + : GCCBuiltin<"__builtin_ppc_load8r">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; + // store + def int_ppc_store2r + : GCCBuiltin<"__builtin_ppc_store2r">, + Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [IntrWriteMem]>; + def int_ppc_store4r + : GCCBuiltin<"__builtin_ppc_store4r">, + Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [IntrWriteMem]>; + def int_ppc_store8r + : GCCBuiltin<"__builtin_ppc_store8r">, + Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], [IntrWriteMem]>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1790,6 +1790,10 @@ (i64 (MULHD $a, $b))>; def : Pat<(i64 (int_ppc_mulhdu g8rc:$a, g8rc:$b)), (i64 (MULHDU $a, $b))>; +def : Pat<(int_ppc_load8r ForceXForm:$ptr), + (LDBRX ForceXForm:$ptr)>; +def : Pat<(int_ppc_store8r g8rc:$a, ForceXForm:$ptr), + (STDBRX g8rc:$a, ForceXForm:$ptr)>; } let Predicates = [IsISA3_0] in { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -5274,6 +5274,16 @@ def : Pat<(i32 (int_ppc_mulhwu gprc:$a, gprc:$b)), (i32 (MULHWU $a, $b))>; +def : Pat<(int_ppc_load2r ForceXForm:$ptr), + (LHBRX ForceXForm:$ptr)>; +def : Pat<(int_ppc_load4r ForceXForm:$ptr), + (LWBRX ForceXForm:$ptr)>; +def : Pat<(int_ppc_store2r gprc:$a, ForceXForm:$ptr), + (STHBRX gprc:$a, ForceXForm:$ptr)>; +def : Pat<(int_ppc_store4r gprc:$a, ForceXForm:$ptr), + (STWBRX gprc:$a, ForceXForm:$ptr)>; + + // Fast 32-bit reverse bits algorithm: // Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): // n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA); diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-load-store-reversed-64bit-only.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-load-store-reversed-64bit-only.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-load-store-reversed-64bit-only.ll @@ -0,0 +1,37 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s + +@ull = external global i64, align 8 +@ull_addr = external global i64*, align 8 + +define dso_local void @test_builtin_ppc_store8r() { +; CHECK-LABEL: test_builtin_ppc_store8r: +; CHECK: stdbrx 3, 0, 4 +; CHECK-NEXT: blr +; +entry: + %0 = load i64, i64* @ull, align 8 + %1 = load i64*, i64** @ull_addr, align 8 + %2 = bitcast i64* %1 to i8* + call void @llvm.ppc.store8r(i64 %0, i8* %2) + ret void +} + +declare void @llvm.ppc.store8r(i64, i8*) + +define dso_local i64 @test_builtin_ppc_load8r() { +; CHECK-LABEL: test_builtin_ppc_load8r: +; CHECK: ldbrx 3, 0, 3 +; CHECK-NEXT: blr +entry: + %0 = load i64*, i64** @ull_addr, align 8 + %1 = bitcast i64* %0 to i8* + %2 = call i64 @llvm.ppc.load8r(i8* %1) + ret i64 %2 +} + +declare i64 @llvm.ppc.load8r(i8*) diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-load-store-reversed.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-load-store-reversed.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-load-store-reversed.ll @@ -0,0 +1,87 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-64B +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-64B +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32B +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-64B + +@us = external global i16, align 2 +@us_addr = external global i16*, align 8 +@ui = external global i32, align 4 +@ui_addr = external global i32*, align 8 + +define dso_local void @test_builtin_ppc_store2r() { +; CHECK-64B-LABEL: test_builtin_ppc_store2r: +; CHECK-64B: sthbrx 3, 0, 4 +; CHECK-64B-NEXT: blr + +; CHECK-32B-LABEL: test_builtin_ppc_store2r: +; CHECK-32B: sthbrx 3, 0, 4 +; CHECK-32B-NEXT: blr +entry: + %0 = load i16, i16* @us, align 2 + %conv = zext i16 %0 to i32 + %1 = load i16*, i16** @us_addr, align 8 + %2 = bitcast i16* %1 to i8* + call void @llvm.ppc.store2r(i32 %conv, i8* %2) + ret void +} + +declare void @llvm.ppc.store2r(i32, i8*) + +define dso_local void @test_builtin_ppc_store4r() { +; CHECK-64B-LABEL: test_builtin_ppc_store4r: +; CHECK-64B: stwbrx 3, 0, 4 +; CHECK-64B-NEXT: blr + +; CHECK-32B-LABEL: test_builtin_ppc_store4r: +; CHECK-32B: stwbrx 3, 0, 4 +; CHECK-32B-NEXT: blr +entry: + %0 = load i32, i32* @ui, align 4 + %1 = load i32*, i32** @ui_addr, align 8 + %2 = bitcast i32* %1 to i8* + call void @llvm.ppc.store4r(i32 %0, i8* %2) + ret void +} + +declare void @llvm.ppc.store4r(i32, i8*) + +define dso_local zeroext i16 @test_builtin_ppc_load2r() { +; CHECK-64B-LABEL: test_builtin_ppc_load2r: +; CHECK-64B: lhbrx 3, 0, 3 +; CHECK-64B-NEXT: clrldi 3, 3, 48 +; CHECK-64B-NEXT: blr + +; CHECK-32B-LABEL: test_builtin_ppc_load2r: +; CHECK-32B: lhbrx 3, 0, 3 +; CHECK-32B-NEXT: clrlwi 3, 3, 16 +; CHECK-32B-NEXT: blr +entry: + %0 = load i16*, i16** @us_addr, align 8 + %1 = bitcast i16* %0 to i8* + %2 = call i32 @llvm.ppc.load2r(i8* %1) + %conv = trunc i32 %2 to i16 + ret i16 %conv +} + +declare i32 @llvm.ppc.load2r(i8*) + +define dso_local zeroext i32 @test_builtin_ppc_load4r() { +; CHECK-64B-LABEL: test_builtin_ppc_load4r: +; CHECK-64B: lwbrx 3, 0, 3 +; CHECK-64B-NEXT: blr + +; CHECK-32B-LABEL: test_builtin_ppc_load4r: +; CHECK-32B: lwbrx 3, 0, 3 +; CHECK-32B-NEXT: blr +entry: + %0 = load i32*, i32** @ui_addr, align 8 + %1 = bitcast i32* %0 to i8* + %2 = call i32 @llvm.ppc.load4r(i8* %1) + ret i32 %2 +} + +declare i32 @llvm.ppc.load4r(i8*) diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-popcnt.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-popcnt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-popcnt.ll @@ -0,0 +1,51 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-64B +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-64B +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32B +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-64B + +@ui = external global i32, align 4 +@ull = external global i64, align 8 + +define dso_local signext i32 @test_builtin_ppc_poppar4() { +; CHECK-32B-LABEL: test_builtin_ppc_poppar4: +; CHECK-32B: popcntw 3, 3 +; CHECK-32B-NEXT: clrlwi 3, 3, 31 +; CHECK-32B-NEXT: blr +; CHECK-64B-LABEL: test_builtin_ppc_poppar4: +; CHECK-64B: popcntw 3, 3 +; CHECK-64B-NEXT: clrlwi 3, 3, 31 +; CHECK-64B-NEXT: blr +entry: + %0 = load i32, i32* @ui, align 4 + %1 = load i32, i32* @ui, align 4 + %2 = call i32 @llvm.ctpop.i32(i32 %1) + %3 = and i32 %2, 1 + ret i32 %3 +} + +declare i32 @llvm.ctpop.i32(i32) + +define dso_local signext i32 @test_builtin_ppc_poppar8() { +; CHECK-32B-LABEL: test_builtin_ppc_poppar8: +; CHECK-32B: xor 3, 3, 4 +; CHECK-32B-NEXT: popcntw 3, 3 +; CHECK-32B-NEXT: clrlwi 3, 3, 31 +; CHECK-32B-NEXT: blr +; CHECK-64B-LABEL: test_builtin_ppc_poppar8: +; CHECK-64B: popcntd 3, 3 +; CHECK-64B-NEXT: clrldi 3, 3, 63 +; CHECK-64B-NEXT: blr +entry: + %0 = load i64, i64* @ull, align 8 + %1 = load i64, i64* @ull, align 8 + %2 = call i64 @llvm.ctpop.i64(i64 %1) + %3 = and i64 %2, 1 + %cast = trunc i64 %3 to i32 + ret i32 %cast +} + +declare i64 @llvm.ctpop.i64(i64)