diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11225,6 +11225,7 @@ Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC); Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC); Register TmpDestReg = RegInfo.createVirtualRegister(GPRC); + Register SrwDestReg = RegInfo.createVirtualRegister(GPRC); Register Ptr1Reg; Register TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC); @@ -11252,7 +11253,8 @@ // stwcx. tmp4, ptr // bne- loopMBB // fallthrough --> exitMBB - // srw dest, tmpDest, shift + // srw SrwDest, tmpDest, shift + // rlwinm SrwDest, SrwDest, 0, 24 [16], 31 if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) @@ -11354,7 +11356,14 @@ // exitMBB: // ... BB = exitMBB; - BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) + // Since the shift amount is not a constant, we need to clear + // the upper bits with a separate RLWINM. + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest) + .addReg(SrwDestReg) + .addImm(0) + .addImm(is8bit ? 24 : 16) + .addImm(31); + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg) .addReg(TmpDestReg) .addReg(ShiftReg); return BB; diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll --- a/llvm/test/CodeGen/PowerPC/atomics.ll +++ b/llvm/test/CodeGen/PowerPC/atomics.ll @@ -360,6 +360,7 @@ ; PPC32-NEXT: bne cr0, .LBB12_1 ; PPC32-NEXT: # %bb.2: ; PPC32-NEXT: srw r3, r7, r3 +; PPC32-NEXT: clrlwi r3, r3, 24 ; PPC32-NEXT: blr ; ; PPC64-LABEL: add_i8_monotonic: @@ -380,6 +381,7 @@ ; PPC64-NEXT: bne cr0, .LBB12_1 ; PPC64-NEXT: # %bb.2: ; PPC64-NEXT: srw r3, r7, r3 +; PPC64-NEXT: clrlwi r3, r3, 24 ; PPC64-NEXT: blr %val = atomicrmw add i8* %mem, i8 %operand monotonic ret i8 %val @@ -405,6 +407,7 @@ ; PPC32-NEXT: bne cr0, .LBB13_1 ; PPC32-NEXT: # %bb.2: ; PPC32-NEXT: srw r3, r7, r3 +; PPC32-NEXT: clrlwi r3, r3, 16 ; PPC32-NEXT: lwsync ; PPC32-NEXT: blr ; @@ -428,6 +431,7 @@ ; PPC64-NEXT: bne cr0, .LBB13_1 ; PPC64-NEXT: # %bb.2: ; PPC64-NEXT: srw r3, r7, r3 +; PPC64-NEXT: clrlwi r3, r3, 16 ; PPC64-NEXT: lwsync ; PPC64-NEXT: blr %val = atomicrmw xor i16* %mem, i16 %operand seq_cst diff --git a/llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll b/llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 %s -o - | FileCheck %s --check-prefix=PWR7 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr9 %s -o - | FileCheck %s --check-prefix=PWR9 + +@value8 = dso_local global { i8 } zeroinitializer, align 1 +@value16 = dso_local global { i16 } zeroinitializer, align 2 +@global_int = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local zeroext i32 @testI8(i8 zeroext %val) local_unnamed_addr #0 { +; PWR7-LABEL: testI8: +; PWR7: # %bb.0: # %entry +; PWR7-NEXT: addis 4, 2, value8@toc@ha +; PWR7-NEXT: li 6, 255 +; PWR7-NEXT: sync +; PWR7-NEXT: addi 5, 4, value8@toc@l +; PWR7-NEXT: rlwinm 4, 5, 3, 27, 28 +; PWR7-NEXT: rldicr 5, 5, 0, 61 +; PWR7-NEXT: xori 4, 4, 24 +; PWR7-NEXT: slw 7, 3, 4 +; PWR7-NEXT: slw 3, 6, 4 +; PWR7-NEXT: and 6, 7, 3 +; PWR7-NEXT: .LBB0_1: # %entry +; PWR7-NEXT: # +; PWR7-NEXT: lwarx 7, 0, 5 +; PWR7-NEXT: andc 8, 7, 3 +; PWR7-NEXT: or 8, 6, 8 +; PWR7-NEXT: stwcx. 8, 0, 5 +; PWR7-NEXT: bne 0, .LBB0_1 +; PWR7-NEXT: # %bb.2: # %entry +; PWR7-NEXT: srw 3, 7, 4 +; PWR7-NEXT: addis 5, 2, global_int@toc@ha +; PWR7-NEXT: lwsync +; PWR7-NEXT: clrlwi 4, 3, 24 +; PWR7-NEXT: li 3, 55 +; PWR7-NEXT: stw 4, global_int@toc@l(5) +; PWR7-NEXT: blr +; +; PWR9-LABEL: testI8: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT: addis 4, 2, value8@toc@ha +; PWR9-NEXT: sync +; PWR9-NEXT: addi 5, 4, value8@toc@l +; PWR9-NEXT: .LBB0_1: # %entry +; PWR9-NEXT: # +; PWR9-NEXT: lbarx 4, 0, 5 +; PWR9-NEXT: stbcx. 3, 0, 5 +; PWR9-NEXT: bne 0, .LBB0_1 +; PWR9-NEXT: # %bb.2: # %entry +; PWR9-NEXT: clrlwi 3, 4, 24 +; PWR9-NEXT: addis 4, 2, global_int@toc@ha +; PWR9-NEXT: lwsync +; PWR9-NEXT: stw 3, global_int@toc@l(4) +; PWR9-NEXT: li 3, 55 +; PWR9-NEXT: blr +entry: + %0 = atomicrmw xchg i8* getelementptr inbounds ({ i8 }, { i8 }* @value8, i64 0, i32 0), i8 %val seq_cst, align 1 + %conv = zext i8 %0 to i32 + store i32 %conv, i32* @global_int, align 4 + ret i32 55 +} + +define dso_local zeroext i32 @testI16(i16 zeroext %val) local_unnamed_addr #0 { +; PWR7-LABEL: testI16: +; PWR7: # %bb.0: # %entry +; PWR7-NEXT: addis 4, 2, value16@toc@ha +; PWR7-NEXT: li 6, 0 +; PWR7-NEXT: sync +; PWR7-NEXT: addi 5, 4, value16@toc@l +; PWR7-NEXT: ori 6, 6, 65535 +; PWR7-NEXT: rlwinm 4, 5, 3, 27, 27 +; PWR7-NEXT: rldicr 5, 5, 0, 61 +; PWR7-NEXT: xori 4, 4, 16 +; PWR7-NEXT: slw 7, 3, 4 +; PWR7-NEXT: slw 3, 6, 4 +; PWR7-NEXT: and 6, 7, 3 +; PWR7-NEXT: .LBB1_1: # %entry +; PWR7-NEXT: # +; PWR7-NEXT: lwarx 7, 0, 5 +; PWR7-NEXT: andc 8, 7, 3 +; PWR7-NEXT: or 8, 6, 8 +; PWR7-NEXT: stwcx. 8, 0, 5 +; PWR7-NEXT: bne 0, .LBB1_1 +; PWR7-NEXT: # %bb.2: # %entry +; PWR7-NEXT: srw 3, 7, 4 +; PWR7-NEXT: addis 5, 2, global_int@toc@ha +; PWR7-NEXT: lwsync +; PWR7-NEXT: clrlwi 4, 3, 16 +; PWR7-NEXT: li 3, 55 +; PWR7-NEXT: stw 4, global_int@toc@l(5) +; PWR7-NEXT: blr +; +; PWR9-LABEL: testI16: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT: addis 4, 2, value16@toc@ha +; PWR9-NEXT: sync +; PWR9-NEXT: addi 5, 4, value16@toc@l +; PWR9-NEXT: .LBB1_1: # %entry +; PWR9-NEXT: # +; PWR9-NEXT: lharx 4, 0, 5 +; PWR9-NEXT: sthcx. 3, 0, 5 +; PWR9-NEXT: bne 0, .LBB1_1 +; PWR9-NEXT: # %bb.2: # %entry +; PWR9-NEXT: clrlwi 3, 4, 16 +; PWR9-NEXT: addis 4, 2, global_int@toc@ha +; PWR9-NEXT: lwsync +; PWR9-NEXT: stw 3, global_int@toc@l(4) +; PWR9-NEXT: li 3, 55 +; PWR9-NEXT: blr +entry: + %0 = atomicrmw xchg i16* getelementptr inbounds ({ i16 }, { i16 }* @value16, i64 0, i32 0), i16 %val seq_cst, align 2 + %conv = zext i16 %0 to i32 + store i32 %conv, i32* @global_int, align 4 + ret i32 55 +} + +attributes #0 = { nounwind }