diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -636,6 +636,7 @@ setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); // Comparisons that require checking two conditions. @@ -1325,7 +1326,6 @@ setMaxAtomicSizeInBitsSupported(128); setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom); } setBooleanContents(ZeroOrOneBooleanContent); diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll --- a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll @@ -2,6 +2,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ ; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-track-subreg-liveness \ ; RUN: -ppc-quadword-atomics < %s | FileCheck --check-prefix=P8 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +; RUN: -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-track-subreg-liveness \ +; RUN: -ppc-quadword-atomics < %s | FileCheck --check-prefix=P7 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +; RUN: -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-track-subreg-liveness \ +; RUN: < %s | FileCheck --check-prefix=P7 %s define dso_local i128 @lq_unordered(i128* %src) { ; P8-LABEL: lq_unordered: @@ -10,6 +16,24 @@ ; P8-NEXT: mr r3, r4 ; P8-NEXT: mr r4, r5 ; P8-NEXT: blr +; +; P7-LABEL: lq_unordered: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: li r4, 0 +; P7-NEXT: li r5, 0 +; P7-NEXT: li r6, 0 +; P7-NEXT: li r7, 0 +; P7-NEXT: bl __sync_val_compare_and_swap_16 +; P7-NEXT: nop +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: %0 = load atomic i128, i128* %src unordered, align 16 ret i128 %0 @@ -24,6 +48,26 @@ ; P8-NEXT: mr r3, r4 ; P8-NEXT: mr r4, r5 ; P8-NEXT: blr +; +; P7-LABEL: lqx_unordered: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: sldi r4, r4, 4 +; P7-NEXT: li r5, 0 +; P7-NEXT: li r6, 0 +; P7-NEXT: li r7, 0 +; P7-NEXT: add r3, r3, r4 +; P7-NEXT: li r4, 0 +; P7-NEXT: bl __sync_val_compare_and_swap_16 +; P7-NEXT: nop +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: %0 = getelementptr i128, i128* %src, i64 %idx %1 = load atomic i128, i128* %0 unordered, align 16 @@ -39,6 +83,25 @@ ; P8-NEXT: mr r3, r4 ; P8-NEXT: mr r4, r5 ; P8-NEXT: blr +; +; P7-LABEL: lq_big_offset_unordered: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: addis r3, r3, 32 +; P7-NEXT: li r4, 0 +; P7-NEXT: li r5, 0 +; P7-NEXT: li r6, 0 +; P7-NEXT: li r7, 0 +; P7-NEXT: bl __sync_val_compare_and_swap_16 +; P7-NEXT: nop +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: %0 = getelementptr i128, i128* %src, i64 131072 %1 = load atomic i128, i128* %0 unordered, align 16 @@ -52,6 +115,24 @@ ; P8-NEXT: mr r3, r4 ; P8-NEXT: mr r4, r5 ; P8-NEXT: blr +; +; P7-LABEL: lq_monotonic: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: li r4, 0 +; P7-NEXT: li r5, 0 +; P7-NEXT: li r6, 0 +; P7-NEXT: li r7, 0 +; P7-NEXT: bl __sync_val_compare_and_swap_16 +; P7-NEXT: nop +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: %0 = load atomic i128, i128* %src monotonic, align 16 ret i128 %0 @@ -67,6 +148,27 @@ ; P8-NEXT: bne- cr7, .+4 ; P8-NEXT: isync ; P8-NEXT: blr +; +; P7-LABEL: lq_acquire: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: li r4, 0 +; P7-NEXT: li r5, 0 +; P7-NEXT: li r6, 0 +; P7-NEXT: li r7, 0 +; P7-NEXT: bl __sync_val_compare_and_swap_16 +; P7-NEXT: nop +; P7-NEXT: cmpd cr7, r4, r4 +; P7-NEXT: bne- cr7, .+4 +; P7-NEXT: isync +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: %0 = load atomic i128, i128* %src acquire, align 16 ret i128 %0 @@ -83,6 +185,28 @@ ; P8-NEXT: bne- cr7, .+4 ; P8-NEXT: isync ; P8-NEXT: blr +; +; P7-LABEL: lq_seqcst: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: li r4, 0 +; P7-NEXT: li r5, 0 +; P7-NEXT: li r6, 0 +; P7-NEXT: li r7, 0 +; P7-NEXT: sync +; P7-NEXT: bl __sync_val_compare_and_swap_16 +; P7-NEXT: nop +; P7-NEXT: cmpd cr7, r4, r4 +; P7-NEXT: bne- cr7, .+4 +; P7-NEXT: isync +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: %0 = load atomic i128, i128* %src seq_cst, align 16 ret i128 %0 @@ -95,6 +219,24 @@ ; P8-NEXT: mr r6, r3 ; P8-NEXT: stq r6, 0(r5) ; P8-NEXT: blr +; +; P7-LABEL: stq_unordered: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: mr r6, r4 +; P7-NEXT: mr r4, r3 +; P7-NEXT: mr r3, r5 +; P7-NEXT: mr r5, r6 +; P7-NEXT: bl __sync_lock_test_and_set_16 +; P7-NEXT: nop +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: store atomic i128 %val, i128* %dst unordered, align 16 ret void @@ -109,6 +251,25 @@ ; P8-NEXT: add r3, r5, r6 ; P8-NEXT: stq r8, 0(r3) ; P8-NEXT: blr +; +; P7-LABEL: stqx_unordered: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: mr r7, r4 +; P7-NEXT: mr r4, r3 +; P7-NEXT: sldi r3, r6, 4 +; P7-NEXT: add r3, r5, r3 +; P7-NEXT: mr r5, r7 +; P7-NEXT: bl __sync_lock_test_and_set_16 +; P7-NEXT: nop +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: %0 = getelementptr i128, i128* %dst, i64 %idx store atomic i128 %val, i128* %0 unordered, align 16 @@ -124,6 +285,24 @@ ; P8-NEXT: add r3, r5, r6 ; P8-NEXT: stq r8, 0(r3) ; P8-NEXT: blr +; +; P7-LABEL: stq_big_offset_unordered: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: mr r6, r4 +; P7-NEXT: mr r4, r3 +; P7-NEXT: addis r3, r5, 32 +; P7-NEXT: mr r5, r6 +; P7-NEXT: bl __sync_lock_test_and_set_16 +; P7-NEXT: nop +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: %0 = getelementptr i128, i128* %dst, i64 131072 store atomic i128 %val, i128* %0 unordered, align 16 @@ -137,6 +316,24 @@ ; P8-NEXT: mr r6, r3 ; P8-NEXT: stq r6, 0(r5) ; P8-NEXT: blr +; +; P7-LABEL: stq_monotonic: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: mr r6, r4 +; P7-NEXT: mr r4, r3 +; P7-NEXT: mr r3, r5 +; P7-NEXT: mr r5, r6 +; P7-NEXT: bl __sync_lock_test_and_set_16 +; P7-NEXT: nop +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: store atomic i128 %val, i128* %dst monotonic, align 16 ret void @@ -150,6 +347,25 @@ ; P8-NEXT: mr r6, r3 ; P8-NEXT: stq r6, 0(r5) ; P8-NEXT: blr +; +; P7-LABEL: stq_release: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: mr r6, r4 +; P7-NEXT: mr r4, r3 +; P7-NEXT: mr r3, r5 +; P7-NEXT: lwsync +; P7-NEXT: mr r5, r6 +; P7-NEXT: bl __sync_lock_test_and_set_16 +; P7-NEXT: nop +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: store atomic i128 %val, i128* %dst release, align 16 ret void @@ -163,6 +379,25 @@ ; P8-NEXT: mr r6, r3 ; P8-NEXT: stq r6, 0(r5) ; P8-NEXT: blr +; +; P7-LABEL: stq_seqcst: +; P7: # %bb.0: # %entry +; P7-NEXT: mflr r0 +; P7-NEXT: std r0, 16(r1) +; P7-NEXT: stdu r1, -112(r1) +; P7-NEXT: .cfi_def_cfa_offset 112 +; P7-NEXT: .cfi_offset lr, 16 +; P7-NEXT: mr r6, r4 +; P7-NEXT: mr r4, r3 +; P7-NEXT: mr r3, r5 +; P7-NEXT: sync +; P7-NEXT: mr r5, r6 +; P7-NEXT: bl __sync_lock_test_and_set_16 +; P7-NEXT: nop +; P7-NEXT: addi r1, r1, 112 +; P7-NEXT: ld r0, 16(r1) +; P7-NEXT: mtlr r0 +; P7-NEXT: blr entry: store atomic i128 %val, i128* %dst seq_cst, align 16 ret void diff --git a/llvm/test/CodeGen/PowerPC/cfence-i128.ll b/llvm/test/CodeGen/PowerPC/cfence-i128.ll --- a/llvm/test/CodeGen/PowerPC/cfence-i128.ll +++ b/llvm/test/CodeGen/PowerPC/cfence-i128.ll @@ -1,15 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts -; RUN: not --crash llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ ; RUN: < %s 2>&1 | FileCheck %s declare void @llvm.ppc.sync() declare void @llvm.ppc.cfence.i128(i128) define void @test_cfence(i128 %src) { +; CHECK-LABEL: test_cfence: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sync +; CHECK-NEXT: cmpd 7, 4, 4 +; CHECK-NEXT: bne- 7, .+4 +; CHECK-NEXT: isync +; CHECK-NEXT: blr entry: call void @llvm.ppc.sync() -; CHECK: ExpandIntegerOperand Op{{.*}}llvm.ppc.cfence -; CHECK: LLVM ERROR: Do not know how to expand this operator's operand! call void @llvm.ppc.cfence.i128(i128 %src) ret void }