diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -57,8 +57,11 @@ BUILTIN(__builtin_ppc_fetch_and_swaplp, "ULiULiD*ULi", "") BUILTIN(__builtin_ppc_ldarx, "LiLiD*", "") BUILTIN(__builtin_ppc_lwarx, "iiD*", "") +BUILTIN(__builtin_ppc_lharx, "isD*", "") +BUILTIN(__builtin_ppc_lbarx, "UiUcD*", "") BUILTIN(__builtin_ppc_stdcx, "iLiD*Li", "") BUILTIN(__builtin_ppc_stwcx, "iiD*i", "") +BUILTIN(__builtin_ppc_sthcx, "isD*s", "") BUILTIN(__builtin_ppc_tdw, "vLLiLLiIUi", "") BUILTIN(__builtin_ppc_tw, "viiIUi", "") BUILTIN(__builtin_ppc_trap, "vi", "") @@ -71,6 +74,10 @@ BUILTIN(__builtin_ppc_fctiwz, "dd", "") BUILTIN(__builtin_ppc_fctudz, "dd", "") BUILTIN(__builtin_ppc_fctuwz, "dd", "") +BUILTIN(__builtin_ppc_dcbtstt, "vv*", "") +BUILTIN(__builtin_ppc_dcbtt, "vv*", "") +BUILTIN(__builtin_ppc_mftbu, "Ui","") +BUILTIN(__builtin_ppc_mfmsr, "Ui", "") BUILTIN(__builtin_ppc_get_timebase, "ULLi", "n") diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -116,8 +116,11 @@ Builder.defineMacro("__fetch_and_swaplp", "__builtin_ppc_fetch_and_swaplp"); Builder.defineMacro("__ldarx", "__builtin_ppc_ldarx"); Builder.defineMacro("__lwarx", "__builtin_ppc_lwarx"); + Builder.defineMacro("__lharx", "__builtin_ppc_lharx"); + Builder.defineMacro("__lbarx", "__builtin_ppc_lbarx"); Builder.defineMacro("__stdcx", "__builtin_ppc_stdcx"); Builder.defineMacro("__stwcx", "__builtin_ppc_stwcx"); + Builder.defineMacro("__sthcx", "__builtin_ppc_sthcx"); Builder.defineMacro("__tdw", "__builtin_ppc_tdw"); Builder.defineMacro("__tw", "__builtin_ppc_tw"); Builder.defineMacro("__trap", "__builtin_ppc_trap"); @@ -130,6 +133,10 @@ Builder.defineMacro("__fctiwz", "__builtin_ppc_fctiwz"); Builder.defineMacro("__fctudz", "__builtin_ppc_fctudz"); Builder.defineMacro("__fctuwz", "__builtin_ppc_fctuwz"); + Builder.defineMacro("__dcbtstt", "__builtin_ppc_dcbtstt"); + Builder.defineMacro("__dcbtt", "__builtin_ppc_dcbtt"); + Builder.defineMacro("__mftbu", "__builtin_ppc_mftbu"); + Builder.defineMacro("__mfmsr", "__builtin_ppc_mfmsr"); } /// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15464,6 +15464,13 @@ return Builder.CreateExtractElement(Unpacked, Index); } + case PPC::BI__builtin_ppc_sthcx: { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx); + Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); + Ops[1] = Builder.CreateSExt(Ops[1], Int32Ty); + return Builder.CreateCall(F, Ops); + } + // The PPC MMA builtins take a pointer to a __vector_quad as an argument. // Some of the MMA instructions accumulate their result into an existing // accumulator whereas the others generate a new accumulator. So we need to diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c b/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c --- a/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c @@ -8,13 +8,35 @@ // RUN: -o - | FileCheck %s int test_lwarx(volatile int* a) { - // CHECK: @test_lwarx + // CHECK-LABEL: @test_lwarx // CHECK: %0 = tail call i32 asm sideeffect "lwarx $0, ${1:y}", "=r,*Z,~{memory}"(i32* %a) return __lwarx(a); } + +short test_lharx(volatile short* a) { + // CHECK-LABEL: @test_lharx + // CHECK: %0 = bitcast i16* %a to i8* + // CHECK: %1 = tail call i32 @llvm.ppc.lharx(i8* %0) + return __lharx(a); +} + +char test_lbarx(volatile unsigned char* a) { + // CHECK-LABEL: @test_lbarx + // CHECK: %0 = tail call i32 @llvm.ppc.lbarx(i8* %a) + return __lbarx(a); +} + int test_stwcx(volatile int* a, int val) { - // CHECK: @test_stwcx + // CHECK-LABEL: @test_stwcx // CHECK: %0 = bitcast i32* %a to i8* // CHECK: %1 = tail call i32 @llvm.ppc.stwcx(i8* %0, i32 %val) return __stwcx(a, val); } + +int test_sthcx(volatile short* a, short val) { + // CHECK-LABEL: @test_sthcx + // CHECK: %0 = bitcast i16* %a to i8* + // CHECK: %1 = sext i16 %val to i32 + // CHECK: %2 = tail call i32 @llvm.ppc.sthcx(i8* %0, i32 %1) + return __sthcx(a, val); +} diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c b/clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -O2 -triple powerpc64-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: %clang_cc1 -O2 -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s +// RUN: %clang_cc1 -O2 -triple powerpc-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: %clang_cc1 -O2 -triple powerpc64-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s + +unsigned int test_mftbu(void) { + // CHECK-LABEL: @test_mftbu + // CHECK: %0 = tail call i32 @llvm.ppc.mftbu() + return __mftbu(); +} + +unsigned long test_mfmsr(void) { + // CHECK-LABEL: @test_mfmsr + // CHECK: %0 = tail call i32 @llvm.ppc.mfmsr() + return __mfmsr(); +} diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c b/clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -O2 -triple powerpc64-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: %clang_cc1 -O2 -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s +// RUN: %clang_cc1 -O2 -triple powerpc-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: %clang_cc1 -O2 -triple powerpc64-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s + +extern void *vpa; + +void test_dcbtstt(void) { + // CHECK-LABEL: @test_dcbtstt + // CHECK: %0 = load i8*, i8** @vpa + // CHECK: tail call void @llvm.ppc.dcbtstt(i8* %0) + // CHECK: ret void + __dcbtstt(vpa); +} + +void test_dcbtt(void) { + // CHECK-LABEL: @test_dcbt + // CHECK: %0 = load i8*, i8** @vpa + // CHECK: tail call void @llvm.ppc.dcbtt(i8* %0) + // CHECK: ret void + __dcbtt(vpa); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1565,5 +1565,17 @@ def int_ppc_stwcx : GCCBuiltin<"__builtin_ppc_stwcx">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWriteMem]>; + def int_ppc_sthcx : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrWriteMem]>; + def int_ppc_lharx : GCCBuiltin<"__builtin_ppc_lharx">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; + def int_ppc_lbarx : GCCBuiltin<"__builtin_ppc_lbarx">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; + def int_ppc_dcbtstt : GCCBuiltin<"__builtin_ppc_dcbtstt">, + Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>; + def int_ppc_dcbtt : GCCBuiltin<"__builtin_ppc_dcbtt">, + Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>; + def int_ppc_mftbu : GCCBuiltin<"__builtin_ppc_mftbu">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + def int_ppc_mfmsr : GCCBuiltin<"__builtin_ppc_mfmsr">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; } - diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -331,7 +331,8 @@ FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic, - FeaturePredictableSelectIsExpensive + FeaturePredictableSelectIsExpensive, + FeatureISA2_07 ]; list P8SpecificFeatures = [FeatureAddiLoadFusion, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -5436,3 +5436,19 @@ (XSCVDPUXDS $A)>; def : Pat<(int_ppc_fctuwz f64:$A), (XSCVDPUXWS $A)>; + +def : Pat<(int_ppc_mfmsr), (MFMSR)>; +def : Pat<(int_ppc_mftbu), (MFTB 269)>; + +let Predicates = [IsISA2_07] in { + def : Pat<(int_ppc_lharx xoaddr:$dst), + (LHARX xoaddr:$dst)>; + def : Pat<(int_ppc_lbarx xoaddr:$dst), + (LBARX xoaddr:$dst)>; + def : Pat<(int_ppc_sthcx xoaddr:$dst, gprc:$A), + (STHCX (EXTSH gprc:$A), xoaddr:$dst)>; +} +def : Pat<(int_ppc_dcbtstt xoaddr:$dst), + (DCBTST 16, xoaddr:$dst)>; +def : Pat<(int_ppc_dcbtt xoaddr:$dst), + (DCBT 16, xoaddr:$dst)>; diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll @@ -50,3 +50,67 @@ %1 = tail call i32 @llvm.ppc.stwcx(i8* %0, i32 %b) ret i32 %1 } + +declare i32 @llvm.ppc.sthcx(i8*, i32) +define dso_local signext i32 @test_sthcx(i16* %a, i16 signext %val) { +; CHECK-64-LABEL: test_sthcx: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: extsh 4, 4 +; CHECK-64-NEXT: sthcx. 4, 0, 3 +; CHECK-64-NEXT: mfocrf 3, 128 +; CHECK-64-NEXT: srwi 3, 3, 28 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_sthcx: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: extsh 4, 4 +; CHECK-32-NEXT: sthcx. 4, 0, 3 +; CHECK-32-NEXT: mfocrf 3, 128 +; CHECK-32-NEXT: srwi 3, 3, 28 +; CHECK-32-NEXT: blr +entry: + %0 = bitcast i16* %a to i8* + %1 = sext i16 %val to i32 + %2 = tail call i32 @llvm.ppc.sthcx(i8* %0, i32 %1) + ret i32 %2 +} + +declare i32 @llvm.ppc.lharx(i8*) +define dso_local signext i16 @test_lharx(i16* %a) local_unnamed_addr #0 { +; CHECK-64-LABEL: test_lharx: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lharx 3, 0, 3 +; CHECK-64-NEXT: extsh 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_lharx: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lharx 3, 0, 3 +; CHECK-32-NEXT: extsh 3, 3 +; CHECK-32-NEXT: blr +entry: + %0 = bitcast i16* %a to i8* + %1 = tail call i32 @llvm.ppc.lharx(i8* %0) + %conv = trunc i32 %1 to i16 + ret i16 %conv +} + +declare i32 @llvm.ppc.lbarx(i8*) +define dso_local zeroext i8 @test_lbarx(i8* %a) { +; CHECK-64-LABEL: test_lbarx: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lbarx 3, 0, 3 +; CHECK-64-NEXT: clrldi 3, 3, 56 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_lbarx: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lbarx 3, 0, 3 +; CHECK-32-NEXT: clrlwi 3, 3, 24 +; CHECK-32-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.lbarx(i8* %a) + %conv = trunc i32 %0 to i8 + ret i8 %conv +} diff --git a/llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll b/llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s + +declare i32 @llvm.ppc.mftbu() +declare i32 @llvm.ppc.mfmsr() + +define dso_local zeroext i32 @test_mftbu() { +; CHECK-LABEL: test_mftbu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mftbu 3 +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr +; +; CHECK-AIX-LABEL: test_mftbu: +; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: mftbu 3 +; CHECK-AIX-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.mftbu() + ret i32 %0 +} + +define dso_local i64 @test_mfmsr() { +; CHECK-LABEL: test_mfmsr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfmsr 3 +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr +; +; CHECK-AIX-LABEL: test_mfmsr: +; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: mfmsr 4 +; CHECK-AIX-NEXT: li 3, 0 +; CHECK-AIX-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.mfmsr() + %conv = zext i32 %0 to i64 + ret i64 %conv +} diff --git a/llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll b/llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64 + +declare void @llvm.ppc.dcbtstt(i8*) +declare void @llvm.ppc.dcbtt(i8*) + +@vpa = external local_unnamed_addr global i8*, align 8 + +define dso_local void @test_dcbtstt() { +; CHECK-LABEL: test_dcbtstt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LC0@toc@ha +; CHECK-NEXT: ld 3, .LC0@toc@l(3) +; CHECK-NEXT: ld 3, 0(3) +; CHECK-NEXT: dcbtstt 0, 3 +; CHECK-NEXT: blr +; +; CHECK-AIX-LABEL: test_dcbtstt: +; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: lwz 3, L..C0(2) # @vpa +; CHECK-AIX-NEXT: lwz 3, 0(3) +; CHECK-AIX-NEXT: dcbtstt 0, 3 +; CHECK-AIX-NEXT: blr +; +; CHECK-AIX64-LABEL: test_dcbtstt: +; CHECK-AIX64: # %bb.0: # %entry +; CHECK-AIX64-NEXT: ld 3, L..C0(2) # @vpa +; CHECK-AIX64-NEXT: ld 3, 0(3) +; CHECK-AIX64-NEXT: dcbtstt 0, 3 +; CHECK-AIX64-NEXT: blr +entry: + %0 = load i8*, i8** @vpa, align 8 + tail call void @llvm.ppc.dcbtstt(i8* %0) + ret void +} + + +define dso_local void @test_dcbtt() { +; CHECK-LABEL: test_dcbtt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LC0@toc@ha +; CHECK-NEXT: ld 3, .LC0@toc@l(3) +; CHECK-NEXT: ld 3, 0(3) +; CHECK-NEXT: dcbtt 0, 3 +; CHECK-NEXT: blr +; +; CHECK-AIX-LABEL: test_dcbtt: +; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: lwz 3, L..C0(2) # @vpa +; CHECK-AIX-NEXT: lwz 3, 0(3) +; CHECK-AIX-NEXT: dcbtt 0, 3 +; CHECK-AIX-NEXT: blr +; +; CHECK-AIX64-LABEL: test_dcbtt: +; CHECK-AIX64: # %bb.0: # %entry +; CHECK-AIX64-NEXT: ld 3, L..C0(2) # @vpa +; CHECK-AIX64-NEXT: ld 3, 0(3) +; CHECK-AIX64-NEXT: dcbtt 0, 3 +; CHECK-AIX64-NEXT: blr +entry: + %0 = load i8*, i8** @vpa, align 8 + tail call void @llvm.ppc.dcbtt(i8* %0) + ret void +}