diff --git a/llvm/test/CodeGen/PowerPC/vector-promotion.ll b/llvm/test/CodeGen/PowerPC/vector-promotion.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vector-promotion.ll @@ -0,0 +1,2528 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - | FileCheck --check-prefix=ASM-P8 %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - -stress-cgp-store-extract | FileCheck --check-prefix=ASM-STRESS-P8 %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 %s -o - | FileCheck --check-prefix=ASM-P9 %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 %s -o - -stress-cgp-store-extract | FileCheck --check-prefix=ASM-STRESS-P9 %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 %s -o - | FileCheck --check-prefix=ASM-P10 %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 %s -o - -stress-cgp-store-extract | FileCheck --check-prefix=ASM-STRESS-P10 %s + +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 %s -o - | FileCheck --check-prefix=ASM-P9-BE %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 %s -o - -stress-cgp-store-extract | FileCheck --check-prefix=ASM-STRESS-P9-BE %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 %s -o - | FileCheck --check-prefix=ASM-P10-BE %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 %s -o - -stress-cgp-store-extract | FileCheck --check-prefix=ASM-STRESS-P10-BE %s + +define void @chainOfInstructionsToPromote(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: chainOfInstructionsToPromote: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: ori 3, 3, 1 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: chainOfInstructionsToPromote: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: vspltisw 2, 1 +; ASM-STRESS-P8-NEXT: xxswapd 35, 0 +; ASM-STRESS-P8-NEXT: xxlor 0, 35, 34 +; ASM-STRESS-P8-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: chainOfInstructionsToPromote: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: ori 3, 3, 1 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: chainOfInstructionsToPromote: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lxv 0, 0(3) +; ASM-STRESS-P9-NEXT: vspltisw 2, 1 +; ASM-STRESS-P9-NEXT: xxlor 0, 0, 34 +; ASM-STRESS-P9-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: chainOfInstructionsToPromote: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: ori 3, 3, 1 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: chainOfInstructionsToPromote: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 0, 0(3) +; ASM-STRESS-P10-NEXT: vspltisw 2, 1 +; ASM-STRESS-P10-NEXT: xxlor 0, 0, 34 +; ASM-STRESS-P10-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + %out1 = or i32 %extract, 1 + %out2 = or i32 %out1, 1 + store i32 %out2, ptr %dest, align 4 + ret void +} + +define void @chainOfInstructionsToPromoteBE(ptr %addr1, ptr %dest) { +; ASM-P9-BE-LABEL: chainOfInstructionsToPromoteBE: +; ASM-P9-BE: # %bb.0: +; ASM-P9-BE-NEXT: lwz 3, 4(3) +; ASM-P9-BE-NEXT: ori 3, 3, 1 +; ASM-P9-BE-NEXT: stw 3, 0(4) +; ASM-P9-BE-NEXT: blr +; +; ASM-STRESS-P9-BE-LABEL: chainOfInstructionsToPromoteBE: +; ASM-STRESS-P9-BE: # %bb.0: +; ASM-STRESS-P9-BE-NEXT: lxv 0, 0(3) +; ASM-STRESS-P9-BE-NEXT: vspltisw 2, 1 +; ASM-STRESS-P9-BE-NEXT: xxlor 0, 0, 34 +; ASM-STRESS-P9-BE-NEXT: xxsldwi 0, 0, 0, 1 +; ASM-STRESS-P9-BE-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P9-BE-NEXT: blr +; +; ASM-P10-BE-LABEL: chainOfInstructionsToPromoteBE: +; ASM-P10-BE: # %bb.0: +; ASM-P10-BE-NEXT: lwz 3, 4(3) +; ASM-P10-BE-NEXT: ori 3, 3, 1 +; ASM-P10-BE-NEXT: stw 3, 0(4) +; ASM-P10-BE-NEXT: blr +; +; ASM-STRESS-P10-BE-LABEL: chainOfInstructionsToPromoteBE: +; ASM-STRESS-P10-BE: # %bb.0: +; ASM-STRESS-P10-BE-NEXT: lxv 0, 0(3) +; ASM-STRESS-P10-BE-NEXT: vspltisw 2, 1 +; ASM-STRESS-P10-BE-NEXT: xxlor 0, 0, 34 +; ASM-STRESS-P10-BE-NEXT: xxsldwi 0, 0, 0, 1 +; ASM-STRESS-P10-BE-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P10-BE-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 1 + %out1 = or i32 %extract, 1 + %out2 = or i32 %out1, 1 + store i32 %out2, ptr %dest, align 4 + ret void +} + +define void @fdivCaseFloat(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: fdivCaseFloat: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: vspltisw 2, 7 +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: xvcvsxwdp 1, 34 +; ASM-P8-NEXT: xxswapd 34, 0 +; ASM-P8-NEXT: xxsldwi 0, 34, 34, 1 +; ASM-P8-NEXT: xscvspdpn 0, 0 +; ASM-P8-NEXT: xsdivsp 0, 0, 1 +; ASM-P8-NEXT: stfs 0, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: fdivCaseFloat: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: addis 5, 2, .LCPI2_0@toc@ha +; ASM-STRESS-P8-NEXT: addi 3, 5, .LCPI2_0@toc@l +; ASM-STRESS-P8-NEXT: xxswapd 34, 0 +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: xvdivsp 0, 34, 0 +; ASM-STRESS-P8-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: fdivCaseFloat: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: vspltisw 2, 7 +; ASM-P9-NEXT: lfs 0, 8(3) +; ASM-P9-NEXT: xvcvsxwdp 1, 34 +; ASM-P9-NEXT: xsdivsp 0, 0, 1 +; ASM-P9-NEXT: stfs 0, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: fdivCaseFloat: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lxv 0, 0(3) +; ASM-STRESS-P9-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; ASM-STRESS-P9-NEXT: addi 3, 3, .LCPI2_0@toc@l +; ASM-STRESS-P9-NEXT: lxv 1, 0(3) +; ASM-STRESS-P9-NEXT: xvdivsp 0, 0, 1 +; ASM-STRESS-P9-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: fdivCaseFloat: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lfs 0, 8(3) +; ASM-P10-NEXT: xxspltidp 1, 1088421888 +; ASM-P10-NEXT: xsdivsp 0, 0, 1 +; ASM-P10-NEXT: stfs 0, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: fdivCaseFloat: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 0, 0(3) +; ASM-STRESS-P10-NEXT: xxspltiw 1, 1088421888 +; ASM-STRESS-P10-NEXT: xvdivsp 0, 0, 1 +; ASM-STRESS-P10-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x float>, ptr %addr1, align 16 + %extract = extractelement <4 x float> %in1, i32 2 + %out = fdiv float %extract, 7.0 + store float %out, ptr %dest, align 4 + ret void +} + +define void @fdivCaseDouble(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: fdivCaseDouble: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: vspltisw 2, 7 +; ASM-P8-NEXT: lfd 1, 8(3) +; ASM-P8-NEXT: xvcvsxwdp 0, 34 +; ASM-P8-NEXT: xsdivdp 0, 1, 0 +; ASM-P8-NEXT: stfd 0, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: fdivCaseDouble: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: addis 5, 2, .LCPI3_0@toc@ha +; ASM-STRESS-P8-NEXT: addi 3, 5, .LCPI3_0@toc@l +; ASM-STRESS-P8-NEXT: lxvd2x 1, 0, 3 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: xvdivdp 0, 0, 1 +; ASM-STRESS-P8-NEXT: stfdx 0, 0, 4 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: fdivCaseDouble: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: vspltisw 2, 7 +; ASM-P9-NEXT: lfd 0, 8(3) +; ASM-P9-NEXT: xvcvsxwdp 1, 34 +; ASM-P9-NEXT: xsdivdp 0, 0, 1 +; ASM-P9-NEXT: stfd 0, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: fdivCaseDouble: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lxv 0, 0(3) +; ASM-STRESS-P9-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; ASM-STRESS-P9-NEXT: addi 3, 3, .LCPI3_0@toc@l +; ASM-STRESS-P9-NEXT: lxv 1, 0(3) +; ASM-STRESS-P9-NEXT: xvdivdp 0, 0, 1 +; ASM-STRESS-P9-NEXT: stfd 0, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: fdivCaseDouble: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lfd 0, 8(3) +; ASM-P10-NEXT: xxspltidp 1, 1088421888 +; ASM-P10-NEXT: xsdivdp 0, 0, 1 +; ASM-P10-NEXT: stfd 0, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: fdivCaseDouble: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 0, 0(3) +; ASM-STRESS-P10-NEXT: xxspltidp 1, 1088421888 +; ASM-STRESS-P10-NEXT: xvdivdp 0, 0, 1 +; ASM-STRESS-P10-NEXT: stfd 0, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x double>, ptr %addr1, align 16 + %extract = extractelement <2 x double> %in1, i32 1 + %out = fdiv double %extract, 7.0 + store double %out, ptr %dest, align 8 + ret void +} + +define void @fremCaseFloat(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: fremCaseFloat: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: mflr 0 +; ASM-P8-NEXT: .cfi_def_cfa_offset 48 +; ASM-P8-NEXT: .cfi_offset lr, 16 +; ASM-P8-NEXT: .cfi_offset r30, -16 +; ASM-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P8-NEXT: stdu 1, -48(1) +; ASM-P8-NEXT: std 0, 64(1) +; ASM-P8-NEXT: mr 30, 4 +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: xxswapd 34, 0 +; ASM-P8-NEXT: xxsldwi 0, 34, 34, 1 +; ASM-P8-NEXT: vspltisw 2, 7 +; ASM-P8-NEXT: xscvspdpn 1, 0 +; ASM-P8-NEXT: xvcvsxwdp 2, 34 +; ASM-P8-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-P8-NEXT: bl fmodf +; ASM-P8-NEXT: nop +; ASM-P8-NEXT: stfs 1, 0(30) +; ASM-P8-NEXT: addi 1, 1, 48 +; ASM-P8-NEXT: ld 0, 16(1) +; ASM-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P8-NEXT: mtlr 0 +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: fremCaseFloat: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: mflr 0 +; ASM-STRESS-P8-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P8-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P8-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P8-NEXT: stdu 1, -48(1) +; ASM-STRESS-P8-NEXT: std 0, 64(1) +; ASM-STRESS-P8-NEXT: mr 30, 4 +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: xxswapd 34, 0 +; ASM-STRESS-P8-NEXT: xxsldwi 0, 34, 34, 1 +; ASM-STRESS-P8-NEXT: vspltisw 2, 7 +; ASM-STRESS-P8-NEXT: xscvspdpn 1, 0 +; ASM-STRESS-P8-NEXT: xvcvsxwdp 2, 34 +; ASM-STRESS-P8-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-STRESS-P8-NEXT: bl fmodf +; ASM-STRESS-P8-NEXT: nop +; ASM-STRESS-P8-NEXT: stfs 1, 0(30) +; ASM-STRESS-P8-NEXT: addi 1, 1, 48 +; ASM-STRESS-P8-NEXT: ld 0, 16(1) +; ASM-STRESS-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P8-NEXT: mtlr 0 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: fremCaseFloat: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: mflr 0 +; ASM-P9-NEXT: .cfi_def_cfa_offset 48 +; ASM-P9-NEXT: .cfi_offset lr, 16 +; ASM-P9-NEXT: .cfi_offset r30, -16 +; ASM-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P9-NEXT: stdu 1, -48(1) +; ASM-P9-NEXT: std 0, 64(1) +; ASM-P9-NEXT: vspltisw 2, 7 +; ASM-P9-NEXT: mr 30, 4 +; ASM-P9-NEXT: lfs 1, 8(3) +; ASM-P9-NEXT: xvcvsxwdp 2, 34 +; ASM-P9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-P9-NEXT: bl fmodf +; ASM-P9-NEXT: nop +; ASM-P9-NEXT: stfs 1, 0(30) +; ASM-P9-NEXT: addi 1, 1, 48 +; ASM-P9-NEXT: ld 0, 16(1) +; ASM-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P9-NEXT: mtlr 0 +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: fremCaseFloat: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: mflr 0 +; ASM-STRESS-P9-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P9-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P9-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P9-NEXT: stdu 1, -48(1) +; ASM-STRESS-P9-NEXT: std 0, 64(1) +; ASM-STRESS-P9-NEXT: vspltisw 2, 7 +; ASM-STRESS-P9-NEXT: mr 30, 4 +; ASM-STRESS-P9-NEXT: lfs 1, 8(3) +; ASM-STRESS-P9-NEXT: xvcvsxwdp 2, 34 +; ASM-STRESS-P9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-STRESS-P9-NEXT: bl fmodf +; ASM-STRESS-P9-NEXT: nop +; ASM-STRESS-P9-NEXT: stfs 1, 0(30) +; ASM-STRESS-P9-NEXT: addi 1, 1, 48 +; ASM-STRESS-P9-NEXT: ld 0, 16(1) +; ASM-STRESS-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P9-NEXT: mtlr 0 +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: fremCaseFloat: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: mflr 0 +; ASM-P10-NEXT: .cfi_def_cfa_offset 48 +; ASM-P10-NEXT: .cfi_offset lr, 16 +; ASM-P10-NEXT: .cfi_offset r30, -16 +; ASM-P10-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P10-NEXT: std 0, 16(1) +; ASM-P10-NEXT: stdu 1, -48(1) +; ASM-P10-NEXT: lfs 1, 8(3) +; ASM-P10-NEXT: xxspltidp 2, 1088421888 +; ASM-P10-NEXT: mr 30, 4 +; ASM-P10-NEXT: bl fmodf@notoc +; ASM-P10-NEXT: stfs 1, 0(30) +; ASM-P10-NEXT: addi 1, 1, 48 +; ASM-P10-NEXT: ld 0, 16(1) +; ASM-P10-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P10-NEXT: mtlr 0 +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: fremCaseFloat: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: mflr 0 +; ASM-STRESS-P10-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P10-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P10-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P10-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P10-NEXT: std 0, 16(1) +; ASM-STRESS-P10-NEXT: stdu 1, -48(1) +; ASM-STRESS-P10-NEXT: lfs 1, 8(3) +; ASM-STRESS-P10-NEXT: xxspltidp 2, 1088421888 +; ASM-STRESS-P10-NEXT: mr 30, 4 +; ASM-STRESS-P10-NEXT: bl fmodf@notoc +; ASM-STRESS-P10-NEXT: stfs 1, 0(30) +; ASM-STRESS-P10-NEXT: addi 1, 1, 48 +; ASM-STRESS-P10-NEXT: ld 0, 16(1) +; ASM-STRESS-P10-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P10-NEXT: mtlr 0 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x float>, ptr %addr1, align 16 + %extract = extractelement <4 x float> %in1, i32 2 + %out = frem float %extract, 7.0 + store float %out, ptr %dest, align 4 + ret void +} + +define void @fremCaseDouble(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: fremCaseDouble: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: mflr 0 +; ASM-P8-NEXT: .cfi_def_cfa_offset 48 +; ASM-P8-NEXT: .cfi_offset lr, 16 +; ASM-P8-NEXT: .cfi_offset r30, -16 +; ASM-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P8-NEXT: stdu 1, -48(1) +; ASM-P8-NEXT: std 0, 64(1) +; ASM-P8-NEXT: vspltisw 2, 7 +; ASM-P8-NEXT: mr 30, 4 +; ASM-P8-NEXT: lfd 1, 8(3) +; ASM-P8-NEXT: xvcvsxwdp 2, 34 +; ASM-P8-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-P8-NEXT: bl fmod +; ASM-P8-NEXT: nop +; ASM-P8-NEXT: stfd 1, 0(30) +; ASM-P8-NEXT: addi 1, 1, 48 +; ASM-P8-NEXT: ld 0, 16(1) +; ASM-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P8-NEXT: mtlr 0 +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: fremCaseDouble: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: mflr 0 +; ASM-STRESS-P8-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P8-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P8-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P8-NEXT: stdu 1, -48(1) +; ASM-STRESS-P8-NEXT: std 0, 64(1) +; ASM-STRESS-P8-NEXT: vspltisw 2, 7 +; ASM-STRESS-P8-NEXT: mr 30, 4 +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: xvcvsxwdp 2, 34 +; ASM-STRESS-P8-NEXT: xxswapd 1, 0 +; ASM-STRESS-P8-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-STRESS-P8-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; ASM-STRESS-P8-NEXT: bl fmod +; ASM-STRESS-P8-NEXT: nop +; ASM-STRESS-P8-NEXT: stfd 1, 0(30) +; ASM-STRESS-P8-NEXT: addi 1, 1, 48 +; ASM-STRESS-P8-NEXT: ld 0, 16(1) +; ASM-STRESS-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P8-NEXT: mtlr 0 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: fremCaseDouble: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: mflr 0 +; ASM-P9-NEXT: .cfi_def_cfa_offset 48 +; ASM-P9-NEXT: .cfi_offset lr, 16 +; ASM-P9-NEXT: .cfi_offset r30, -16 +; ASM-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P9-NEXT: stdu 1, -48(1) +; ASM-P9-NEXT: std 0, 64(1) +; ASM-P9-NEXT: vspltisw 2, 7 +; ASM-P9-NEXT: mr 30, 4 +; ASM-P9-NEXT: lfd 1, 8(3) +; ASM-P9-NEXT: xvcvsxwdp 2, 34 +; ASM-P9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-P9-NEXT: bl fmod +; ASM-P9-NEXT: nop +; ASM-P9-NEXT: stfd 1, 0(30) +; ASM-P9-NEXT: addi 1, 1, 48 +; ASM-P9-NEXT: ld 0, 16(1) +; ASM-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P9-NEXT: mtlr 0 +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: fremCaseDouble: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: mflr 0 +; ASM-STRESS-P9-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P9-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P9-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P9-NEXT: stdu 1, -48(1) +; ASM-STRESS-P9-NEXT: std 0, 64(1) +; ASM-STRESS-P9-NEXT: vspltisw 2, 7 +; ASM-STRESS-P9-NEXT: mr 30, 4 +; ASM-STRESS-P9-NEXT: lfd 1, 8(3) +; ASM-STRESS-P9-NEXT: xvcvsxwdp 2, 34 +; ASM-STRESS-P9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-STRESS-P9-NEXT: bl fmod +; ASM-STRESS-P9-NEXT: nop +; ASM-STRESS-P9-NEXT: stfd 1, 0(30) +; ASM-STRESS-P9-NEXT: addi 1, 1, 48 +; ASM-STRESS-P9-NEXT: ld 0, 16(1) +; ASM-STRESS-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P9-NEXT: mtlr 0 +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: fremCaseDouble: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: mflr 0 +; ASM-P10-NEXT: .cfi_def_cfa_offset 48 +; ASM-P10-NEXT: .cfi_offset lr, 16 +; ASM-P10-NEXT: .cfi_offset r30, -16 +; ASM-P10-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P10-NEXT: std 0, 16(1) +; ASM-P10-NEXT: stdu 1, -48(1) +; ASM-P10-NEXT: lfd 1, 8(3) +; ASM-P10-NEXT: xxspltidp 2, 1088421888 +; ASM-P10-NEXT: mr 30, 4 +; ASM-P10-NEXT: bl fmod@notoc +; ASM-P10-NEXT: stfd 1, 0(30) +; ASM-P10-NEXT: addi 1, 1, 48 +; ASM-P10-NEXT: ld 0, 16(1) +; ASM-P10-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P10-NEXT: mtlr 0 +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: fremCaseDouble: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: mflr 0 +; ASM-STRESS-P10-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P10-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P10-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P10-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P10-NEXT: std 0, 16(1) +; ASM-STRESS-P10-NEXT: stdu 1, -48(1) +; ASM-STRESS-P10-NEXT: lfd 1, 8(3) +; ASM-STRESS-P10-NEXT: xxspltidp 2, 1088421888 +; ASM-STRESS-P10-NEXT: mr 30, 4 +; ASM-STRESS-P10-NEXT: bl fmod@notoc +; ASM-STRESS-P10-NEXT: stfd 1, 0(30) +; ASM-STRESS-P10-NEXT: addi 1, 1, 48 +; ASM-STRESS-P10-NEXT: ld 0, 16(1) +; ASM-STRESS-P10-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P10-NEXT: mtlr 0 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x double>, ptr %addr1, align 16 + %extract = extractelement <2 x double> %in1, i32 1 + %out = frem double %extract, 7.0 + store double %out, ptr %dest, align 8 + ret void +} + +define void @sdivCase32(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: sdivCase32: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lis 5, -28087 +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: ori 5, 5, 9363 +; ASM-P8-NEXT: mulhw 5, 3, 5 +; ASM-P8-NEXT: add 3, 5, 3 +; ASM-P8-NEXT: srwi 5, 3, 31 +; ASM-P8-NEXT: srawi 3, 3, 2 +; ASM-P8-NEXT: add 3, 3, 5 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: sdivCase32: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: lis 5, -28087 +; ASM-STRESS-P8-NEXT: ori 5, 5, 9363 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: mffprwz 3, 0 +; ASM-STRESS-P8-NEXT: mulhw 5, 3, 5 +; ASM-STRESS-P8-NEXT: add 3, 5, 3 +; ASM-STRESS-P8-NEXT: srwi 5, 3, 31 +; ASM-STRESS-P8-NEXT: srawi 3, 3, 2 +; ASM-STRESS-P8-NEXT: add 3, 3, 5 +; ASM-STRESS-P8-NEXT: stw 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: sdivCase32: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: lis 5, -28087 +; ASM-P9-NEXT: ori 5, 5, 9363 +; ASM-P9-NEXT: mulhw 5, 3, 5 +; ASM-P9-NEXT: add 3, 5, 3 +; ASM-P9-NEXT: srwi 5, 3, 31 +; ASM-P9-NEXT: srawi 3, 3, 2 +; ASM-P9-NEXT: add 3, 3, 5 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: sdivCase32: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lwz 3, 8(3) +; ASM-STRESS-P9-NEXT: lis 5, -28087 +; ASM-STRESS-P9-NEXT: ori 5, 5, 9363 +; ASM-STRESS-P9-NEXT: mulhw 5, 3, 5 +; ASM-STRESS-P9-NEXT: add 3, 5, 3 +; ASM-STRESS-P9-NEXT: srwi 5, 3, 31 +; ASM-STRESS-P9-NEXT: srawi 3, 3, 2 +; ASM-STRESS-P9-NEXT: add 3, 3, 5 +; ASM-STRESS-P9-NEXT: stw 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: sdivCase32: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: pli 5, -1840700269 +; ASM-P10-NEXT: mulhw 5, 3, 5 +; ASM-P10-NEXT: add 3, 5, 3 +; ASM-P10-NEXT: srwi 5, 3, 31 +; ASM-P10-NEXT: srawi 3, 3, 2 +; ASM-P10-NEXT: add 3, 3, 5 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: sdivCase32: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 34, 0(3) +; ASM-STRESS-P10-NEXT: xxspltiw 35, -1840700269 +; ASM-STRESS-P10-NEXT: vspltisw 4, 2 +; ASM-STRESS-P10-NEXT: vmulhsw 3, 2, 3 +; ASM-STRESS-P10-NEXT: vadduwm 2, 3, 2 +; ASM-STRESS-P10-NEXT: xxspltiw 35, 31 +; ASM-STRESS-P10-NEXT: vsrw 3, 2, 3 +; ASM-STRESS-P10-NEXT: vsraw 2, 2, 4 +; ASM-STRESS-P10-NEXT: vadduwm 2, 2, 3 +; ASM-STRESS-P10-NEXT: stxsiwx 34, 0, 4 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + %out = sdiv i32 %extract, 7 + store i32 %out, ptr %dest, align 4 + ret void +} + +define void @sdivCase64(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: sdivCase64: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: lis 5, 18724 +; ASM-P8-NEXT: ori 3, 5, 37449 +; ASM-P8-NEXT: rldic 3, 3, 32, 1 +; ASM-P8-NEXT: xxswapd 0, 0 +; ASM-P8-NEXT: oris 3, 3, 9362 +; ASM-P8-NEXT: ori 3, 3, 18725 +; ASM-P8-NEXT: mffprd 5, 0 +; ASM-P8-NEXT: mulhd 3, 5, 3 +; ASM-P8-NEXT: rldicl 5, 3, 1, 63 +; ASM-P8-NEXT: sradi 3, 3, 1 +; ASM-P8-NEXT: add 3, 3, 5 +; ASM-P8-NEXT: std 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: sdivCase64: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: lis 5, 18724 +; ASM-STRESS-P8-NEXT: ori 3, 5, 37449 +; ASM-STRESS-P8-NEXT: rldic 3, 3, 32, 1 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: oris 3, 3, 9362 +; ASM-STRESS-P8-NEXT: ori 3, 3, 18725 +; ASM-STRESS-P8-NEXT: mffprd 5, 0 +; ASM-STRESS-P8-NEXT: mulhd 3, 5, 3 +; ASM-STRESS-P8-NEXT: rldicl 5, 3, 1, 63 +; ASM-STRESS-P8-NEXT: sradi 3, 3, 1 +; ASM-STRESS-P8-NEXT: add 3, 3, 5 +; ASM-STRESS-P8-NEXT: std 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: sdivCase64: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lis 5, 18724 +; ASM-P9-NEXT: ld 3, 8(3) +; ASM-P9-NEXT: ori 5, 5, 37449 +; ASM-P9-NEXT: rldic 5, 5, 32, 1 +; ASM-P9-NEXT: oris 5, 5, 9362 +; ASM-P9-NEXT: ori 5, 5, 18725 +; ASM-P9-NEXT: mulhd 3, 3, 5 +; ASM-P9-NEXT: rldicl 5, 3, 1, 63 +; ASM-P9-NEXT: sradi 3, 3, 1 +; ASM-P9-NEXT: add 3, 3, 5 +; ASM-P9-NEXT: std 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: sdivCase64: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lis 5, 18724 +; ASM-STRESS-P9-NEXT: ld 3, 8(3) +; ASM-STRESS-P9-NEXT: ori 5, 5, 37449 +; ASM-STRESS-P9-NEXT: rldic 5, 5, 32, 1 +; ASM-STRESS-P9-NEXT: oris 5, 5, 9362 +; ASM-STRESS-P9-NEXT: ori 5, 5, 18725 +; ASM-STRESS-P9-NEXT: mulhd 3, 3, 5 +; ASM-STRESS-P9-NEXT: rldicl 5, 3, 1, 63 +; ASM-STRESS-P9-NEXT: sradi 3, 3, 1 +; ASM-STRESS-P9-NEXT: add 3, 3, 5 +; ASM-STRESS-P9-NEXT: std 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: sdivCase64: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: ld 3, 8(3) +; ASM-P10-NEXT: pli 5, 1227133513 +; ASM-P10-NEXT: pli 6, 613566757 +; ASM-P10-NEXT: rldimi 6, 5, 32, 0 +; ASM-P10-NEXT: mulhd 3, 3, 6 +; ASM-P10-NEXT: rldicl 5, 3, 1, 63 +; ASM-P10-NEXT: sradi 3, 3, 1 +; ASM-P10-NEXT: add 3, 3, 5 +; ASM-P10-NEXT: std 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: sdivCase64: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: xxsplti32dx 35, 0, 1227133513 +; ASM-STRESS-P10-NEXT: lxv 34, 0(3) +; ASM-STRESS-P10-NEXT: xxlxor 36, 36, 36 +; ASM-STRESS-P10-NEXT: xxsplti32dx 36, 1, 63 +; ASM-STRESS-P10-NEXT: xxsplti32dx 35, 1, 613566757 +; ASM-STRESS-P10-NEXT: vmulhsd 2, 2, 3 +; ASM-STRESS-P10-NEXT: xxlxor 35, 35, 35 +; ASM-STRESS-P10-NEXT: xxsplti32dx 35, 1, 1 +; ASM-STRESS-P10-NEXT: vsrd 4, 2, 4 +; ASM-STRESS-P10-NEXT: vsrad 2, 2, 3 +; ASM-STRESS-P10-NEXT: vaddudm 2, 2, 4 +; ASM-STRESS-P10-NEXT: stxsd 2, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x i64>, ptr %addr1, align 16 + %extract = extractelement <2 x i64> %in1, i32 1 + %out = sdiv i64 %extract, 7 + store i64 %out, ptr %dest, align 8 + ret void +} + +define void @sremCase64(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: sremCase64: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: lis 5, 18724 +; ASM-P8-NEXT: ori 3, 5, 37449 +; ASM-P8-NEXT: rldic 3, 3, 32, 1 +; ASM-P8-NEXT: xxswapd 0, 0 +; ASM-P8-NEXT: oris 3, 3, 9362 +; ASM-P8-NEXT: ori 3, 3, 18725 +; ASM-P8-NEXT: mffprd 5, 0 +; ASM-P8-NEXT: mulhd 3, 5, 3 +; ASM-P8-NEXT: rldicl 6, 3, 1, 63 +; ASM-P8-NEXT: sradi 3, 3, 1 +; ASM-P8-NEXT: add 3, 3, 6 +; ASM-P8-NEXT: sldi 6, 3, 3 +; ASM-P8-NEXT: sub 3, 3, 6 +; ASM-P8-NEXT: add 3, 5, 3 +; ASM-P8-NEXT: std 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: sremCase64: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: lis 5, 18724 +; ASM-STRESS-P8-NEXT: ori 3, 5, 37449 +; ASM-STRESS-P8-NEXT: rldic 3, 3, 32, 1 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: oris 3, 3, 9362 +; ASM-STRESS-P8-NEXT: ori 3, 3, 18725 +; ASM-STRESS-P8-NEXT: mffprd 5, 0 +; ASM-STRESS-P8-NEXT: mulhd 3, 5, 3 +; ASM-STRESS-P8-NEXT: rldicl 6, 3, 1, 63 +; ASM-STRESS-P8-NEXT: sradi 3, 3, 1 +; ASM-STRESS-P8-NEXT: add 3, 3, 6 +; ASM-STRESS-P8-NEXT: sldi 6, 3, 3 +; ASM-STRESS-P8-NEXT: sub 3, 3, 6 +; ASM-STRESS-P8-NEXT: add 3, 5, 3 +; ASM-STRESS-P8-NEXT: std 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: sremCase64: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lis 5, 18724 +; ASM-P9-NEXT: ld 3, 8(3) +; ASM-P9-NEXT: ori 5, 5, 37449 +; ASM-P9-NEXT: rldic 5, 5, 32, 1 +; ASM-P9-NEXT: oris 5, 5, 9362 +; ASM-P9-NEXT: ori 5, 5, 18725 +; ASM-P9-NEXT: mulhd 5, 3, 5 +; ASM-P9-NEXT: rldicl 6, 5, 1, 63 +; ASM-P9-NEXT: sradi 5, 5, 1 +; ASM-P9-NEXT: add 5, 5, 6 +; ASM-P9-NEXT: sldi 6, 5, 3 +; ASM-P9-NEXT: sub 5, 5, 6 +; ASM-P9-NEXT: add 3, 3, 5 +; ASM-P9-NEXT: std 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: sremCase64: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lis 5, 18724 +; ASM-STRESS-P9-NEXT: ld 3, 8(3) +; ASM-STRESS-P9-NEXT: ori 5, 5, 37449 +; ASM-STRESS-P9-NEXT: rldic 5, 5, 32, 1 +; ASM-STRESS-P9-NEXT: oris 5, 5, 9362 +; ASM-STRESS-P9-NEXT: ori 5, 5, 18725 +; ASM-STRESS-P9-NEXT: mulhd 5, 3, 5 +; ASM-STRESS-P9-NEXT: rldicl 6, 5, 1, 63 +; ASM-STRESS-P9-NEXT: sradi 5, 5, 1 +; ASM-STRESS-P9-NEXT: add 5, 5, 6 +; ASM-STRESS-P9-NEXT: sldi 6, 5, 3 +; ASM-STRESS-P9-NEXT: sub 5, 5, 6 +; ASM-STRESS-P9-NEXT: add 3, 3, 5 +; ASM-STRESS-P9-NEXT: std 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: sremCase64: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: ld 3, 8(3) +; ASM-P10-NEXT: pli 5, 1227133513 +; ASM-P10-NEXT: pli 6, 613566757 +; ASM-P10-NEXT: rldimi 6, 5, 32, 0 +; ASM-P10-NEXT: mulhd 5, 3, 6 +; ASM-P10-NEXT: rldicl 6, 5, 1, 63 +; ASM-P10-NEXT: sradi 5, 5, 1 +; ASM-P10-NEXT: add 5, 5, 6 +; ASM-P10-NEXT: sldi 6, 5, 3 +; ASM-P10-NEXT: sub 5, 5, 6 +; ASM-P10-NEXT: add 3, 3, 5 +; ASM-P10-NEXT: std 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: sremCase64: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: xxsplti32dx 35, 0, 1227133513 +; ASM-STRESS-P10-NEXT: lxv 34, 0(3) +; ASM-STRESS-P10-NEXT: xxlxor 37, 37, 37 +; ASM-STRESS-P10-NEXT: xxlxor 32, 32, 32 +; ASM-STRESS-P10-NEXT: xxlxor 36, 36, 36 +; ASM-STRESS-P10-NEXT: xxsplti32dx 37, 1, 63 +; ASM-STRESS-P10-NEXT: xxsplti32dx 32, 1, 1 +; ASM-STRESS-P10-NEXT: xxsplti32dx 36, 1, 3 +; ASM-STRESS-P10-NEXT: xxsplti32dx 35, 1, 613566757 +; ASM-STRESS-P10-NEXT: vmulhsd 3, 2, 3 +; ASM-STRESS-P10-NEXT: vsrd 5, 3, 5 +; ASM-STRESS-P10-NEXT: vsrad 3, 3, 0 +; ASM-STRESS-P10-NEXT: vaddudm 3, 3, 5 +; ASM-STRESS-P10-NEXT: vsld 4, 3, 4 +; ASM-STRESS-P10-NEXT: vsubudm 3, 3, 4 +; ASM-STRESS-P10-NEXT: vaddudm 2, 2, 3 +; ASM-STRESS-P10-NEXT: stxsd 2, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x i64>, ptr %addr1, align 16 + %extract = extractelement <2 x i64> %in1, i32 1 + %out = srem i64 %extract, 7 + store i64 %out, ptr %dest, align 8 + ret void +} + +define void @sremCase32(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: sremCase32: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lis 5, -28087 +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: ori 5, 5, 9363 +; ASM-P8-NEXT: mulhw 5, 3, 5 +; ASM-P8-NEXT: add 5, 5, 3 +; ASM-P8-NEXT: srwi 6, 5, 31 +; ASM-P8-NEXT: srawi 5, 5, 2 +; ASM-P8-NEXT: add 5, 5, 6 +; ASM-P8-NEXT: slwi 6, 5, 3 +; ASM-P8-NEXT: sub 5, 5, 6 +; ASM-P8-NEXT: add 3, 3, 5 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: sremCase32: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: lis 5, -28087 +; ASM-STRESS-P8-NEXT: ori 5, 5, 9363 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: mffprwz 3, 0 +; ASM-STRESS-P8-NEXT: mulhw 5, 3, 5 +; ASM-STRESS-P8-NEXT: add 5, 5, 3 +; ASM-STRESS-P8-NEXT: srwi 6, 5, 31 +; ASM-STRESS-P8-NEXT: srawi 5, 5, 2 +; ASM-STRESS-P8-NEXT: add 5, 5, 6 +; ASM-STRESS-P8-NEXT: slwi 6, 5, 3 +; ASM-STRESS-P8-NEXT: sub 5, 5, 6 +; ASM-STRESS-P8-NEXT: add 3, 3, 5 +; ASM-STRESS-P8-NEXT: stw 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: sremCase32: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: lis 5, -28087 +; ASM-P9-NEXT: ori 5, 5, 9363 +; ASM-P9-NEXT: mulhw 5, 3, 5 +; ASM-P9-NEXT: add 5, 5, 3 +; ASM-P9-NEXT: srwi 6, 5, 31 +; ASM-P9-NEXT: srawi 5, 5, 2 +; ASM-P9-NEXT: add 5, 5, 6 +; ASM-P9-NEXT: slwi 6, 5, 3 +; ASM-P9-NEXT: sub 5, 5, 6 +; ASM-P9-NEXT: add 3, 3, 5 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: sremCase32: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lwz 3, 8(3) +; ASM-STRESS-P9-NEXT: lis 5, -28087 +; ASM-STRESS-P9-NEXT: ori 5, 5, 9363 +; ASM-STRESS-P9-NEXT: mulhw 5, 3, 5 +; ASM-STRESS-P9-NEXT: add 5, 5, 3 +; ASM-STRESS-P9-NEXT: srwi 6, 5, 31 +; ASM-STRESS-P9-NEXT: srawi 5, 5, 2 +; ASM-STRESS-P9-NEXT: add 5, 5, 6 +; ASM-STRESS-P9-NEXT: slwi 6, 5, 3 +; ASM-STRESS-P9-NEXT: sub 5, 5, 6 +; ASM-STRESS-P9-NEXT: add 3, 3, 5 +; ASM-STRESS-P9-NEXT: stw 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: sremCase32: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: pli 5, -1840700269 +; ASM-P10-NEXT: mulhw 5, 3, 5 +; ASM-P10-NEXT: add 5, 5, 3 +; ASM-P10-NEXT: srwi 6, 5, 31 +; ASM-P10-NEXT: srawi 5, 5, 2 +; ASM-P10-NEXT: add 5, 5, 6 +; ASM-P10-NEXT: slwi 6, 5, 3 +; ASM-P10-NEXT: sub 5, 5, 6 +; ASM-P10-NEXT: add 3, 3, 5 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: sremCase32: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 34, 0(3) +; ASM-STRESS-P10-NEXT: xxspltiw 35, -1840700269 +; ASM-STRESS-P10-NEXT: xxspltiw 36, 31 +; ASM-STRESS-P10-NEXT: vspltisw 5, 2 +; ASM-STRESS-P10-NEXT: vmulhsw 3, 2, 3 +; ASM-STRESS-P10-NEXT: vadduwm 3, 3, 2 +; ASM-STRESS-P10-NEXT: vsrw 4, 3, 4 +; ASM-STRESS-P10-NEXT: vsraw 3, 3, 5 +; ASM-STRESS-P10-NEXT: vadduwm 3, 3, 4 +; ASM-STRESS-P10-NEXT: vspltisw 4, 3 +; ASM-STRESS-P10-NEXT: vslw 4, 3, 4 +; ASM-STRESS-P10-NEXT: vsubuwm 3, 3, 4 +; ASM-STRESS-P10-NEXT: vadduwm 2, 2, 3 +; ASM-STRESS-P10-NEXT: stxsiwx 34, 0, 4 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + %out = srem i32 %extract, 7 + store i32 %out, ptr %dest, align 4 + ret void +} + +define void @udivCase64(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: udivCase64: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: lis 5, 2340 +; ASM-P8-NEXT: ori 3, 5, 37449 +; ASM-P8-NEXT: rldic 3, 3, 34, 2 +; ASM-P8-NEXT: xxswapd 0, 0 +; ASM-P8-NEXT: oris 3, 3, 37449 +; ASM-P8-NEXT: ori 3, 3, 9363 +; ASM-P8-NEXT: mffprd 5, 0 +; ASM-P8-NEXT: mulhdu 3, 5, 3 +; ASM-P8-NEXT: sub 5, 5, 3 +; ASM-P8-NEXT: rldicl 5, 5, 63, 1 +; ASM-P8-NEXT: add 3, 5, 3 +; ASM-P8-NEXT: rldicl 3, 3, 62, 2 +; ASM-P8-NEXT: std 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: udivCase64: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: lis 5, 2340 +; ASM-STRESS-P8-NEXT: ori 3, 5, 37449 +; ASM-STRESS-P8-NEXT: rldic 3, 3, 34, 2 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: oris 3, 3, 37449 +; ASM-STRESS-P8-NEXT: ori 3, 3, 9363 +; ASM-STRESS-P8-NEXT: mffprd 5, 0 +; ASM-STRESS-P8-NEXT: mulhdu 3, 5, 3 +; ASM-STRESS-P8-NEXT: sub 5, 5, 3 +; ASM-STRESS-P8-NEXT: rldicl 5, 5, 63, 1 +; ASM-STRESS-P8-NEXT: add 3, 5, 3 +; ASM-STRESS-P8-NEXT: rldicl 3, 3, 62, 2 +; ASM-STRESS-P8-NEXT: std 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: udivCase64: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lis 5, 2340 +; ASM-P9-NEXT: ld 3, 8(3) +; ASM-P9-NEXT: ori 5, 5, 37449 +; ASM-P9-NEXT: rldic 5, 5, 34, 2 +; ASM-P9-NEXT: oris 5, 5, 37449 +; ASM-P9-NEXT: ori 5, 5, 9363 +; ASM-P9-NEXT: mulhdu 5, 3, 5 +; ASM-P9-NEXT: sub 3, 3, 5 +; ASM-P9-NEXT: rldicl 3, 3, 63, 1 +; ASM-P9-NEXT: add 3, 3, 5 +; ASM-P9-NEXT: rldicl 3, 3, 62, 2 +; ASM-P9-NEXT: std 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: udivCase64: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lis 5, 2340 +; ASM-STRESS-P9-NEXT: ld 3, 8(3) +; ASM-STRESS-P9-NEXT: ori 5, 5, 37449 +; ASM-STRESS-P9-NEXT: rldic 5, 5, 34, 2 +; ASM-STRESS-P9-NEXT: oris 5, 5, 37449 +; ASM-STRESS-P9-NEXT: ori 5, 5, 9363 +; ASM-STRESS-P9-NEXT: mulhdu 5, 3, 5 +; ASM-STRESS-P9-NEXT: sub 3, 3, 5 +; ASM-STRESS-P9-NEXT: rldicl 3, 3, 63, 1 +; ASM-STRESS-P9-NEXT: add 3, 3, 5 +; ASM-STRESS-P9-NEXT: rldicl 3, 3, 62, 2 +; ASM-STRESS-P9-NEXT: std 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: udivCase64: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: ld 3, 8(3) +; ASM-P10-NEXT: pli 5, 613566756 +; ASM-P10-NEXT: pli 6, 2454267027 +; ASM-P10-NEXT: rldimi 6, 5, 32, 0 +; ASM-P10-NEXT: mulhdu 5, 3, 6 +; ASM-P10-NEXT: sub 3, 3, 5 +; ASM-P10-NEXT: rldicl 3, 3, 63, 1 +; ASM-P10-NEXT: add 3, 3, 5 +; ASM-P10-NEXT: rldicl 3, 3, 62, 2 +; ASM-P10-NEXT: std 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: udivCase64: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: xxsplti32dx 35, 0, 613566756 +; ASM-STRESS-P10-NEXT: lxv 34, 0(3) +; ASM-STRESS-P10-NEXT: xxlxor 37, 37, 37 +; ASM-STRESS-P10-NEXT: xxlxor 36, 36, 36 +; ASM-STRESS-P10-NEXT: xxsplti32dx 37, 1, 1 +; ASM-STRESS-P10-NEXT: xxsplti32dx 36, 1, 2 +; ASM-STRESS-P10-NEXT: xxsplti32dx 35, 1, -1840700269 +; ASM-STRESS-P10-NEXT: vmulhud 3, 2, 3 +; ASM-STRESS-P10-NEXT: vsubudm 2, 2, 3 +; ASM-STRESS-P10-NEXT: vsrd 2, 2, 5 +; ASM-STRESS-P10-NEXT: vaddudm 2, 2, 3 +; ASM-STRESS-P10-NEXT: vsrd 2, 2, 4 +; ASM-STRESS-P10-NEXT: stxsd 2, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x i64>, ptr %addr1, align 16 + %extract = extractelement <2 x i64> %in1, i32 1 + %out = udiv i64 %extract, 7 + store i64 %out, ptr %dest, align 8 + ret void +} + +define void @udivCase32(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: udivCase32: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lis 5, 9362 +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: ori 5, 5, 18725 +; ASM-P8-NEXT: mulhwu 5, 3, 5 +; ASM-P8-NEXT: sub 3, 3, 5 +; ASM-P8-NEXT: srwi 3, 3, 1 +; ASM-P8-NEXT: add 3, 3, 5 +; ASM-P8-NEXT: srwi 3, 3, 2 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: udivCase32: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: lis 5, 9362 +; ASM-STRESS-P8-NEXT: ori 5, 5, 18725 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: mffprwz 3, 0 +; ASM-STRESS-P8-NEXT: mulhwu 5, 3, 5 +; ASM-STRESS-P8-NEXT: sub 3, 3, 5 +; ASM-STRESS-P8-NEXT: srwi 3, 3, 1 +; ASM-STRESS-P8-NEXT: add 3, 3, 5 +; ASM-STRESS-P8-NEXT: srwi 3, 3, 2 +; ASM-STRESS-P8-NEXT: stw 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: udivCase32: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: lis 5, 9362 +; ASM-P9-NEXT: ori 5, 5, 18725 +; ASM-P9-NEXT: mulhwu 5, 3, 5 +; ASM-P9-NEXT: sub 3, 3, 5 +; ASM-P9-NEXT: srwi 3, 3, 1 +; ASM-P9-NEXT: add 3, 3, 5 +; ASM-P9-NEXT: srwi 3, 3, 2 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: udivCase32: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lwz 3, 8(3) +; ASM-STRESS-P9-NEXT: lis 5, 9362 +; ASM-STRESS-P9-NEXT: ori 5, 5, 18725 +; ASM-STRESS-P9-NEXT: mulhwu 5, 3, 5 +; ASM-STRESS-P9-NEXT: sub 3, 3, 5 +; ASM-STRESS-P9-NEXT: srwi 3, 3, 1 +; ASM-STRESS-P9-NEXT: add 3, 3, 5 +; ASM-STRESS-P9-NEXT: srwi 3, 3, 2 +; ASM-STRESS-P9-NEXT: stw 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: udivCase32: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: pli 5, 613566757 +; ASM-P10-NEXT: mulhwu 5, 3, 5 +; ASM-P10-NEXT: sub 3, 3, 5 +; ASM-P10-NEXT: srwi 3, 3, 1 +; ASM-P10-NEXT: add 3, 3, 5 +; ASM-P10-NEXT: srwi 3, 3, 2 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: udivCase32: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 34, 0(3) +; ASM-STRESS-P10-NEXT: xxspltiw 35, 613566757 +; ASM-STRESS-P10-NEXT: vspltisw 4, 1 +; ASM-STRESS-P10-NEXT: vmulhuw 3, 2, 3 +; ASM-STRESS-P10-NEXT: vsubuwm 2, 2, 3 +; ASM-STRESS-P10-NEXT: vsrw 2, 2, 4 +; ASM-STRESS-P10-NEXT: vadduwm 2, 2, 3 +; ASM-STRESS-P10-NEXT: vspltisw 3, 2 +; ASM-STRESS-P10-NEXT: vsrw 2, 2, 3 +; ASM-STRESS-P10-NEXT: stxsiwx 34, 0, 4 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + %out = udiv i32 %extract, 7 + store i32 %out, ptr %dest, align 4 + ret void +} + +define void @undefDivCase64(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: undefDivCase64: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: li 5, 7 +; ASM-P8-NEXT: xxswapd 0, 0 +; ASM-P8-NEXT: mffprd 3, 0 +; ASM-P8-NEXT: divdu 3, 5, 3 +; ASM-P8-NEXT: std 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: undefDivCase64: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: li 5, 7 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: mffprd 3, 0 +; ASM-STRESS-P8-NEXT: divdu 3, 5, 3 +; ASM-STRESS-P8-NEXT: std 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: undefDivCase64: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: ld 3, 8(3) +; ASM-P9-NEXT: li 5, 7 +; ASM-P9-NEXT: divdu 3, 5, 3 +; ASM-P9-NEXT: std 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: undefDivCase64: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: ld 3, 8(3) +; ASM-STRESS-P9-NEXT: li 5, 7 +; ASM-STRESS-P9-NEXT: divdu 3, 5, 3 +; ASM-STRESS-P9-NEXT: std 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: undefDivCase64: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: ld 3, 8(3) +; ASM-P10-NEXT: li 5, 7 +; ASM-P10-NEXT: divdu 3, 5, 3 +; ASM-P10-NEXT: std 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: undefDivCase64: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: ld 3, 8(3) +; ASM-STRESS-P10-NEXT: li 5, 7 +; ASM-STRESS-P10-NEXT: divdu 3, 5, 3 +; ASM-STRESS-P10-NEXT: std 3, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x i64>, ptr %addr1, align 16 + %extract = extractelement <2 x i64> %in1, i32 1 + %out = udiv i64 7, %extract + store i64 %out, ptr %dest, align 8 + ret void +} + +define void @undefDivCase32(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: undefDivCase32: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: li 5, 7 +; ASM-P8-NEXT: divwu 3, 5, 3 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: undefDivCase32: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lwz 3, 8(3) +; ASM-STRESS-P8-NEXT: li 5, 7 +; ASM-STRESS-P8-NEXT: divwu 3, 5, 3 +; ASM-STRESS-P8-NEXT: stw 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: undefDivCase32: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: li 5, 7 +; ASM-P9-NEXT: divwu 3, 5, 3 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: undefDivCase32: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lwz 3, 8(3) +; ASM-STRESS-P9-NEXT: li 5, 7 +; ASM-STRESS-P9-NEXT: divwu 3, 5, 3 +; ASM-STRESS-P9-NEXT: stw 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: undefDivCase32: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: li 5, 7 +; ASM-P10-NEXT: divwu 3, 5, 3 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: undefDivCase32: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lwz 3, 8(3) +; ASM-STRESS-P10-NEXT: li 5, 7 +; ASM-STRESS-P10-NEXT: divwu 3, 5, 3 +; ASM-STRESS-P10-NEXT: stw 3, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + %out = udiv i32 7, %extract + store i32 %out, ptr %dest, align 4 + ret void +} + +define void @undefRemCase64(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: undefRemCase64: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: li 5, 7 +; ASM-P8-NEXT: xxswapd 0, 0 +; ASM-P8-NEXT: mffprd 3, 0 +; ASM-P8-NEXT: divd 5, 5, 3 +; ASM-P8-NEXT: mulld 3, 5, 3 +; ASM-P8-NEXT: subfic 3, 3, 7 +; ASM-P8-NEXT: std 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: undefRemCase64: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: li 5, 7 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: mffprd 3, 0 +; ASM-STRESS-P8-NEXT: divd 5, 5, 3 +; ASM-STRESS-P8-NEXT: mulld 3, 5, 3 +; ASM-STRESS-P8-NEXT: subfic 3, 3, 7 +; ASM-STRESS-P8-NEXT: std 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: undefRemCase64: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: ld 3, 8(3) +; ASM-P9-NEXT: li 5, 7 +; ASM-P9-NEXT: modsd 3, 5, 3 +; ASM-P9-NEXT: std 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: undefRemCase64: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: ld 3, 8(3) +; ASM-STRESS-P9-NEXT: li 5, 7 +; ASM-STRESS-P9-NEXT: modsd 3, 5, 3 +; ASM-STRESS-P9-NEXT: std 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: undefRemCase64: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: ld 3, 8(3) +; ASM-P10-NEXT: li 5, 7 +; ASM-P10-NEXT: modsd 3, 5, 3 +; ASM-P10-NEXT: std 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: undefRemCase64: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: ld 3, 8(3) +; ASM-STRESS-P10-NEXT: li 5, 7 +; ASM-STRESS-P10-NEXT: modsd 3, 5, 3 +; ASM-STRESS-P10-NEXT: std 3, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x i64>, ptr %addr1, align 16 + %extract = extractelement <2 x i64> %in1, i32 1 + %out = srem i64 7, %extract + store i64 %out, ptr %dest, align 8 + ret void +} + +define void @undefRemCase32(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: undefRemCase32: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: li 5, 7 +; ASM-P8-NEXT: divw 5, 5, 3 +; ASM-P8-NEXT: mullw 3, 5, 3 +; ASM-P8-NEXT: subfic 3, 3, 7 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: undefRemCase32: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lwz 3, 8(3) +; ASM-STRESS-P8-NEXT: li 5, 7 +; ASM-STRESS-P8-NEXT: divw 5, 5, 3 +; ASM-STRESS-P8-NEXT: mullw 3, 5, 3 +; ASM-STRESS-P8-NEXT: subfic 3, 3, 7 +; ASM-STRESS-P8-NEXT: stw 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: undefRemCase32: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: li 5, 7 +; ASM-P9-NEXT: modsw 3, 5, 3 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: undefRemCase32: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lwz 3, 8(3) +; ASM-STRESS-P9-NEXT: li 5, 7 +; ASM-STRESS-P9-NEXT: modsw 3, 5, 3 +; ASM-STRESS-P9-NEXT: stw 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: undefRemCase32: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: li 5, 7 +; ASM-P10-NEXT: modsw 3, 5, 3 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: undefRemCase32: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lwz 3, 8(3) +; ASM-STRESS-P10-NEXT: li 5, 7 +; ASM-STRESS-P10-NEXT: modsw 3, 5, 3 +; ASM-STRESS-P10-NEXT: stw 3, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + %out = srem i32 7, %extract + store i32 %out, ptr %dest, align 4 + ret void +} + +define void @uremCase64(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: uremCase64: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: lis 5, 2340 +; ASM-P8-NEXT: ori 3, 5, 37449 +; ASM-P8-NEXT: rldic 3, 3, 34, 2 +; ASM-P8-NEXT: xxswapd 0, 0 +; ASM-P8-NEXT: oris 3, 3, 37449 +; ASM-P8-NEXT: ori 3, 3, 9363 +; ASM-P8-NEXT: mffprd 5, 0 +; ASM-P8-NEXT: mulhdu 3, 5, 3 +; ASM-P8-NEXT: sub 6, 5, 3 +; ASM-P8-NEXT: rldicl 6, 6, 63, 1 +; ASM-P8-NEXT: add 3, 6, 3 +; ASM-P8-NEXT: rldicl 6, 3, 62, 2 +; ASM-P8-NEXT: rldicr 3, 3, 1, 60 +; ASM-P8-NEXT: sub 3, 6, 3 +; ASM-P8-NEXT: add 3, 5, 3 +; ASM-P8-NEXT: std 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: uremCase64: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: lis 5, 2340 +; ASM-STRESS-P8-NEXT: ori 3, 5, 37449 +; ASM-STRESS-P8-NEXT: rldic 3, 3, 34, 2 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: oris 3, 3, 37449 +; ASM-STRESS-P8-NEXT: ori 3, 3, 9363 +; ASM-STRESS-P8-NEXT: mffprd 5, 0 +; ASM-STRESS-P8-NEXT: mulhdu 3, 5, 3 +; ASM-STRESS-P8-NEXT: sub 6, 5, 3 +; ASM-STRESS-P8-NEXT: rldicl 6, 6, 63, 1 +; ASM-STRESS-P8-NEXT: add 3, 6, 3 +; ASM-STRESS-P8-NEXT: rldicl 6, 3, 62, 2 +; ASM-STRESS-P8-NEXT: rldicr 3, 3, 1, 60 +; ASM-STRESS-P8-NEXT: sub 3, 6, 3 +; ASM-STRESS-P8-NEXT: add 3, 5, 3 +; ASM-STRESS-P8-NEXT: std 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: uremCase64: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lis 5, 2340 +; ASM-P9-NEXT: ld 3, 8(3) +; ASM-P9-NEXT: ori 5, 5, 37449 +; ASM-P9-NEXT: rldic 5, 5, 34, 2 +; ASM-P9-NEXT: oris 5, 5, 37449 +; ASM-P9-NEXT: ori 5, 5, 9363 +; ASM-P9-NEXT: mulhdu 5, 3, 5 +; ASM-P9-NEXT: sub 6, 3, 5 +; ASM-P9-NEXT: rldicl 6, 6, 63, 1 +; ASM-P9-NEXT: add 5, 6, 5 +; ASM-P9-NEXT: rldicl 6, 5, 62, 2 +; ASM-P9-NEXT: rldicr 5, 5, 1, 60 +; ASM-P9-NEXT: sub 5, 6, 5 +; ASM-P9-NEXT: add 3, 3, 5 +; ASM-P9-NEXT: std 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: uremCase64: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lis 5, 2340 +; ASM-STRESS-P9-NEXT: ld 3, 8(3) +; ASM-STRESS-P9-NEXT: ori 5, 5, 37449 +; ASM-STRESS-P9-NEXT: rldic 5, 5, 34, 2 +; ASM-STRESS-P9-NEXT: oris 5, 5, 37449 +; ASM-STRESS-P9-NEXT: ori 5, 5, 9363 +; ASM-STRESS-P9-NEXT: mulhdu 5, 3, 5 +; ASM-STRESS-P9-NEXT: sub 6, 3, 5 +; ASM-STRESS-P9-NEXT: rldicl 6, 6, 63, 1 +; ASM-STRESS-P9-NEXT: add 5, 6, 5 +; ASM-STRESS-P9-NEXT: rldicl 6, 5, 62, 2 +; ASM-STRESS-P9-NEXT: rldicr 5, 5, 1, 60 +; ASM-STRESS-P9-NEXT: sub 5, 6, 5 +; ASM-STRESS-P9-NEXT: add 3, 3, 5 +; ASM-STRESS-P9-NEXT: std 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: uremCase64: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: ld 3, 8(3) +; ASM-P10-NEXT: pli 5, 613566756 +; ASM-P10-NEXT: pli 6, 2454267027 +; ASM-P10-NEXT: rldimi 6, 5, 32, 0 +; ASM-P10-NEXT: mulhdu 5, 3, 6 +; ASM-P10-NEXT: sub 6, 3, 5 +; ASM-P10-NEXT: rldicl 6, 6, 63, 1 +; ASM-P10-NEXT: add 5, 6, 5 +; ASM-P10-NEXT: rldicl 6, 5, 62, 2 +; ASM-P10-NEXT: rldicr 5, 5, 1, 60 +; ASM-P10-NEXT: sub 5, 6, 5 +; ASM-P10-NEXT: add 3, 3, 5 +; ASM-P10-NEXT: std 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: uremCase64: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: xxsplti32dx 35, 0, 613566756 +; ASM-STRESS-P10-NEXT: lxv 34, 0(3) +; ASM-STRESS-P10-NEXT: xxlxor 32, 32, 32 +; ASM-STRESS-P10-NEXT: xxlxor 37, 37, 37 +; ASM-STRESS-P10-NEXT: xxsplti32dx 32, 1, 1 +; ASM-STRESS-P10-NEXT: xxsplti32dx 37, 1, 3 +; ASM-STRESS-P10-NEXT: xxsplti32dx 35, 1, -1840700269 +; ASM-STRESS-P10-NEXT: vmulhud 3, 2, 3 +; ASM-STRESS-P10-NEXT: vsubudm 4, 2, 3 +; ASM-STRESS-P10-NEXT: vsrd 4, 4, 0 +; ASM-STRESS-P10-NEXT: vaddudm 3, 4, 3 +; ASM-STRESS-P10-NEXT: xxlxor 36, 36, 36 +; ASM-STRESS-P10-NEXT: xxsplti32dx 36, 1, 2 +; ASM-STRESS-P10-NEXT: vsrd 3, 3, 4 +; ASM-STRESS-P10-NEXT: vsld 4, 3, 5 +; ASM-STRESS-P10-NEXT: vsubudm 3, 3, 4 +; ASM-STRESS-P10-NEXT: vaddudm 2, 2, 3 +; ASM-STRESS-P10-NEXT: stxsd 2, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x i64>, ptr %addr1, align 16 + %extract = extractelement <2 x i64> %in1, i32 1 + %out = urem i64 %extract, 7 + store i64 %out, ptr %dest, align 8 + ret void +} + +define void @uremCase32(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: uremCase32: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lis 5, 9362 +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: ori 5, 5, 18725 +; ASM-P8-NEXT: mulhwu 5, 3, 5 +; ASM-P8-NEXT: sub 6, 3, 5 +; ASM-P8-NEXT: srwi 6, 6, 1 +; ASM-P8-NEXT: add 5, 6, 5 +; ASM-P8-NEXT: srwi 6, 5, 2 +; ASM-P8-NEXT: rlwinm 5, 5, 1, 0, 28 +; ASM-P8-NEXT: sub 5, 6, 5 +; ASM-P8-NEXT: add 3, 3, 5 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: uremCase32: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: lis 5, 9362 +; ASM-STRESS-P8-NEXT: ori 5, 5, 18725 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: mffprwz 3, 0 +; ASM-STRESS-P8-NEXT: mulhwu 5, 3, 5 +; ASM-STRESS-P8-NEXT: sub 6, 3, 5 +; ASM-STRESS-P8-NEXT: srwi 6, 6, 1 +; ASM-STRESS-P8-NEXT: add 5, 6, 5 +; ASM-STRESS-P8-NEXT: srwi 6, 5, 2 +; ASM-STRESS-P8-NEXT: rlwinm 5, 5, 1, 0, 28 +; ASM-STRESS-P8-NEXT: sub 5, 6, 5 +; ASM-STRESS-P8-NEXT: add 3, 3, 5 +; ASM-STRESS-P8-NEXT: stw 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: uremCase32: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: lis 5, 9362 +; ASM-P9-NEXT: ori 5, 5, 18725 +; ASM-P9-NEXT: mulhwu 5, 3, 5 +; ASM-P9-NEXT: sub 6, 3, 5 +; ASM-P9-NEXT: srwi 6, 6, 1 +; ASM-P9-NEXT: add 5, 6, 5 +; ASM-P9-NEXT: srwi 6, 5, 2 +; ASM-P9-NEXT: rlwinm 5, 5, 1, 0, 28 +; ASM-P9-NEXT: sub 5, 6, 5 +; ASM-P9-NEXT: add 3, 3, 5 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: uremCase32: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lwz 3, 8(3) +; ASM-STRESS-P9-NEXT: lis 5, 9362 +; ASM-STRESS-P9-NEXT: ori 5, 5, 18725 +; ASM-STRESS-P9-NEXT: mulhwu 5, 3, 5 +; ASM-STRESS-P9-NEXT: sub 6, 3, 5 +; ASM-STRESS-P9-NEXT: srwi 6, 6, 1 +; ASM-STRESS-P9-NEXT: add 5, 6, 5 +; ASM-STRESS-P9-NEXT: srwi 6, 5, 2 +; ASM-STRESS-P9-NEXT: rlwinm 5, 5, 1, 0, 28 +; ASM-STRESS-P9-NEXT: sub 5, 6, 5 +; ASM-STRESS-P9-NEXT: add 3, 3, 5 +; ASM-STRESS-P9-NEXT: stw 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: uremCase32: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: pli 5, 613566757 +; ASM-P10-NEXT: mulhwu 5, 3, 5 +; ASM-P10-NEXT: sub 6, 3, 5 +; ASM-P10-NEXT: srwi 6, 6, 1 +; ASM-P10-NEXT: add 5, 6, 5 +; ASM-P10-NEXT: srwi 6, 5, 2 +; ASM-P10-NEXT: rlwinm 5, 5, 1, 0, 28 +; ASM-P10-NEXT: sub 5, 6, 5 +; ASM-P10-NEXT: add 3, 3, 5 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: uremCase32: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 34, 0(3) +; ASM-STRESS-P10-NEXT: xxspltiw 35, 613566757 +; ASM-STRESS-P10-NEXT: vspltisw 5, 1 +; ASM-STRESS-P10-NEXT: vmulhuw 3, 2, 3 +; ASM-STRESS-P10-NEXT: vsubuwm 4, 2, 3 +; ASM-STRESS-P10-NEXT: vsrw 4, 4, 5 +; ASM-STRESS-P10-NEXT: vadduwm 3, 4, 3 +; ASM-STRESS-P10-NEXT: vspltisw 4, 2 +; ASM-STRESS-P10-NEXT: vsrw 3, 3, 4 +; ASM-STRESS-P10-NEXT: vspltisw 4, 3 +; ASM-STRESS-P10-NEXT: vslw 4, 3, 4 +; ASM-STRESS-P10-NEXT: vsubuwm 3, 3, 4 +; ASM-STRESS-P10-NEXT: vadduwm 2, 2, 3 +; ASM-STRESS-P10-NEXT: stxsiwx 34, 0, 4 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + %out = urem i32 %extract, 7 + store i32 %out, ptr %dest, align 4 + ret void +} + +define void @uremCase32BE(ptr %addr1, ptr %dest) { +; ASM-P9-BE-LABEL: uremCase32BE: +; ASM-P9-BE: # %bb.0: +; ASM-P9-BE-NEXT: lwz 3, 4(3) +; ASM-P9-BE-NEXT: lis 5, 9362 +; ASM-P9-BE-NEXT: ori 5, 5, 18725 +; ASM-P9-BE-NEXT: mulhwu 5, 3, 5 +; ASM-P9-BE-NEXT: sub 6, 3, 5 +; ASM-P9-BE-NEXT: srwi 6, 6, 1 +; ASM-P9-BE-NEXT: add 5, 6, 5 +; ASM-P9-BE-NEXT: srwi 6, 5, 2 +; ASM-P9-BE-NEXT: rlwinm 5, 5, 1, 0, 28 +; ASM-P9-BE-NEXT: sub 5, 6, 5 +; ASM-P9-BE-NEXT: add 3, 3, 5 +; ASM-P9-BE-NEXT: stw 3, 0(4) +; ASM-P9-BE-NEXT: blr +; +; ASM-STRESS-P9-BE-LABEL: uremCase32BE: +; ASM-STRESS-P9-BE: # %bb.0: +; ASM-STRESS-P9-BE-NEXT: lwz 3, 4(3) +; ASM-STRESS-P9-BE-NEXT: lis 5, 9362 +; ASM-STRESS-P9-BE-NEXT: ori 5, 5, 18725 +; ASM-STRESS-P9-BE-NEXT: mulhwu 5, 3, 5 +; ASM-STRESS-P9-BE-NEXT: sub 6, 3, 5 +; ASM-STRESS-P9-BE-NEXT: srwi 6, 6, 1 +; ASM-STRESS-P9-BE-NEXT: add 5, 6, 5 +; ASM-STRESS-P9-BE-NEXT: srwi 6, 5, 2 +; ASM-STRESS-P9-BE-NEXT: rlwinm 5, 5, 1, 0, 28 +; ASM-STRESS-P9-BE-NEXT: sub 5, 6, 5 +; ASM-STRESS-P9-BE-NEXT: add 3, 3, 5 +; ASM-STRESS-P9-BE-NEXT: stw 3, 0(4) +; ASM-STRESS-P9-BE-NEXT: blr +; +; ASM-P10-BE-LABEL: uremCase32BE: +; ASM-P10-BE: # %bb.0: +; ASM-P10-BE-NEXT: lwz 3, 4(3) +; ASM-P10-BE-NEXT: pli 5, 613566757 +; ASM-P10-BE-NEXT: mulhwu 5, 3, 5 +; ASM-P10-BE-NEXT: sub 6, 3, 5 +; ASM-P10-BE-NEXT: srwi 6, 6, 1 +; ASM-P10-BE-NEXT: add 5, 6, 5 +; ASM-P10-BE-NEXT: srwi 6, 5, 2 +; ASM-P10-BE-NEXT: rlwinm 5, 5, 1, 0, 28 +; ASM-P10-BE-NEXT: sub 5, 6, 5 +; ASM-P10-BE-NEXT: add 3, 3, 5 +; ASM-P10-BE-NEXT: stw 3, 0(4) +; ASM-P10-BE-NEXT: blr +; +; ASM-STRESS-P10-BE-LABEL: uremCase32BE: +; ASM-STRESS-P10-BE: # %bb.0: +; ASM-STRESS-P10-BE-NEXT: lxv 34, 0(3) +; ASM-STRESS-P10-BE-NEXT: xxspltiw 35, 613566757 +; ASM-STRESS-P10-BE-NEXT: vspltisw 5, 1 +; ASM-STRESS-P10-BE-NEXT: vmulhuw 3, 2, 3 +; ASM-STRESS-P10-BE-NEXT: vsubuwm 4, 2, 3 +; ASM-STRESS-P10-BE-NEXT: vsrw 4, 4, 5 +; ASM-STRESS-P10-BE-NEXT: vadduwm 3, 4, 3 +; ASM-STRESS-P10-BE-NEXT: vspltisw 4, 2 +; ASM-STRESS-P10-BE-NEXT: vsrw 3, 3, 4 +; ASM-STRESS-P10-BE-NEXT: vspltisw 4, 3 +; ASM-STRESS-P10-BE-NEXT: vslw 4, 3, 4 +; ASM-STRESS-P10-BE-NEXT: vsubuwm 3, 3, 4 +; ASM-STRESS-P10-BE-NEXT: vadduwm 2, 2, 3 +; ASM-STRESS-P10-BE-NEXT: xxsldwi 0, 34, 34, 1 +; ASM-STRESS-P10-BE-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P10-BE-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 1 + %out = urem i32 %extract, 7 + store i32 %out, ptr %dest, align 4 + ret void +} + +define void @simpleOneInstructionPromotionDouble(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: simpleOneInstructionPromotionDouble: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: vspltisw 2, 1 +; ASM-P8-NEXT: lfd 1, 8(3) +; ASM-P8-NEXT: xvcvsxwdp 0, 34 +; ASM-P8-NEXT: xsadddp 0, 1, 0 +; ASM-P8-NEXT: stfd 0, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: simpleOneInstructionPromotionDouble: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: addis 5, 2, .LCPI19_0@toc@ha +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: addi 5, 5, .LCPI19_0@toc@l +; ASM-STRESS-P8-NEXT: lxvd2x 1, 0, 5 +; ASM-STRESS-P8-NEXT: xxswapd 0, 0 +; ASM-STRESS-P8-NEXT: xxswapd 1, 1 +; ASM-STRESS-P8-NEXT: xvadddp 0, 0, 1 +; ASM-STRESS-P8-NEXT: stfdx 0, 0, 4 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: simpleOneInstructionPromotionDouble: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: vspltisw 2, 1 +; ASM-P9-NEXT: lfd 0, 8(3) +; ASM-P9-NEXT: xvcvsxwdp 1, 34 +; ASM-P9-NEXT: xsadddp 0, 0, 1 +; ASM-P9-NEXT: stfd 0, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: simpleOneInstructionPromotionDouble: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lxv 0, 0(3) +; ASM-STRESS-P9-NEXT: addis 3, 2, .LCPI19_0@toc@ha +; ASM-STRESS-P9-NEXT: addi 3, 3, .LCPI19_0@toc@l +; ASM-STRESS-P9-NEXT: lxv 1, 0(3) +; ASM-STRESS-P9-NEXT: xvadddp 0, 0, 1 +; ASM-STRESS-P9-NEXT: stfd 0, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: simpleOneInstructionPromotionDouble: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lfd 0, 8(3) +; ASM-P10-NEXT: xxspltidp 1, 1065353216 +; ASM-P10-NEXT: xsadddp 0, 0, 1 +; ASM-P10-NEXT: stfd 0, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: simpleOneInstructionPromotionDouble: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 0, 0(3) +; ASM-STRESS-P10-NEXT: xxspltidp 1, 1065353216 +; ASM-STRESS-P10-NEXT: xvadddp 0, 0, 1 +; ASM-STRESS-P10-NEXT: stfd 0, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x double>, ptr %addr1, align 16 + %extract = extractelement <2 x double> %in1, i32 1 + %out = fadd double %extract, 1.0 + store double %out, ptr %dest, align 8 + ret void +} + +define void @simpleOneInstructionPromotionFloat(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: simpleOneInstructionPromotionFloat: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: vspltisw 2, 1 +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: xvcvsxwdp 1, 34 +; ASM-P8-NEXT: xxswapd 34, 0 +; ASM-P8-NEXT: xxsldwi 0, 34, 34, 1 +; ASM-P8-NEXT: xscvspdpn 0, 0 +; ASM-P8-NEXT: xsaddsp 0, 0, 1 +; ASM-P8-NEXT: stfs 0, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: simpleOneInstructionPromotionFloat: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: addis 5, 2, .LCPI20_0@toc@ha +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: addi 5, 5, .LCPI20_0@toc@l +; ASM-STRESS-P8-NEXT: lxvd2x 1, 0, 5 +; ASM-STRESS-P8-NEXT: xxswapd 34, 0 +; ASM-STRESS-P8-NEXT: xxswapd 35, 1 +; ASM-STRESS-P8-NEXT: xvaddsp 0, 34, 35 +; ASM-STRESS-P8-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: simpleOneInstructionPromotionFloat: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: vspltisw 2, 1 +; ASM-P9-NEXT: lfs 0, 8(3) +; ASM-P9-NEXT: xvcvsxwdp 1, 34 +; ASM-P9-NEXT: xsaddsp 0, 0, 1 +; ASM-P9-NEXT: stfs 0, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: simpleOneInstructionPromotionFloat: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lxv 0, 0(3) +; ASM-STRESS-P9-NEXT: addis 3, 2, .LCPI20_0@toc@ha +; ASM-STRESS-P9-NEXT: addi 3, 3, .LCPI20_0@toc@l +; ASM-STRESS-P9-NEXT: lxv 1, 0(3) +; ASM-STRESS-P9-NEXT: xvaddsp 0, 0, 1 +; ASM-STRESS-P9-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: simpleOneInstructionPromotionFloat: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lfs 0, 8(3) +; ASM-P10-NEXT: xxspltidp 1, 1065353216 +; ASM-P10-NEXT: xsaddsp 0, 0, 1 +; ASM-P10-NEXT: stfs 0, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: simpleOneInstructionPromotionFloat: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 0, 0(3) +; ASM-STRESS-P10-NEXT: xxspltiw 1, 1065353216 +; ASM-STRESS-P10-NEXT: xvaddsp 0, 0, 1 +; ASM-STRESS-P10-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x float>, ptr %addr1, align 16 + %extract = extractelement <4 x float> %in1, i32 2 + %out = fadd float %extract, 1.0 + store float %out, ptr %dest, align 4 + ret void +} + +define void @simpleOneInstructionPromotion64(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: simpleOneInstructionPromotion64: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: xxswapd 0, 0 +; ASM-P8-NEXT: mffprd 3, 0 +; ASM-P8-NEXT: ori 3, 3, 1 +; ASM-P8-NEXT: std 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: simpleOneInstructionPromotion64: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: addis 5, 2, .LCPI21_0@toc@ha +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: addi 5, 5, .LCPI21_0@toc@l +; ASM-STRESS-P8-NEXT: lxvd2x 1, 0, 5 +; ASM-STRESS-P8-NEXT: xxswapd 34, 0 +; ASM-STRESS-P8-NEXT: xxswapd 35, 1 +; ASM-STRESS-P8-NEXT: xxlor 0, 34, 35 +; ASM-STRESS-P8-NEXT: stfdx 0, 0, 4 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: simpleOneInstructionPromotion64: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: ld 3, 8(3) +; ASM-P9-NEXT: ori 3, 3, 1 +; ASM-P9-NEXT: std 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: simpleOneInstructionPromotion64: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lxv 0, 0(3) +; ASM-STRESS-P9-NEXT: addis 3, 2, .LCPI21_0@toc@ha +; ASM-STRESS-P9-NEXT: addi 3, 3, .LCPI21_0@toc@l +; ASM-STRESS-P9-NEXT: lxv 1, 0(3) +; ASM-STRESS-P9-NEXT: xxlor 0, 0, 1 +; ASM-STRESS-P9-NEXT: stfd 0, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: simpleOneInstructionPromotion64: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: ld 3, 8(3) +; ASM-P10-NEXT: ori 3, 3, 1 +; ASM-P10-NEXT: std 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: simpleOneInstructionPromotion64: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 0, 0(3) +; ASM-STRESS-P10-NEXT: xxlxor 1, 1, 1 +; ASM-STRESS-P10-NEXT: xxsplti32dx 1, 1, 1 +; ASM-STRESS-P10-NEXT: xxlor 0, 0, 1 +; ASM-STRESS-P10-NEXT: stfd 0, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x i64>, ptr %addr1, align 16 + %extract = extractelement <2 x i64> %in1, i32 1 + %out = or i64 %extract, 1 + store i64 %out, ptr %dest, align 8 + ret void +} + +define void @simpleOneInstructionPromotion32(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: simpleOneInstructionPromotion32: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: ori 3, 3, 1 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: simpleOneInstructionPromotion32: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: vspltisw 2, 1 +; ASM-STRESS-P8-NEXT: xxswapd 35, 0 +; ASM-STRESS-P8-NEXT: xxlor 0, 35, 34 +; ASM-STRESS-P8-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: simpleOneInstructionPromotion32: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: ori 3, 3, 1 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: simpleOneInstructionPromotion32: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lxv 0, 0(3) +; ASM-STRESS-P9-NEXT: vspltisw 2, 1 +; ASM-STRESS-P9-NEXT: xxlor 0, 0, 34 +; ASM-STRESS-P9-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: simpleOneInstructionPromotion32: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: ori 3, 3, 1 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: simpleOneInstructionPromotion32: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 0, 0(3) +; ASM-STRESS-P10-NEXT: vspltisw 2, 1 +; ASM-STRESS-P10-NEXT: xxlor 0, 0, 34 +; ASM-STRESS-P10-NEXT: stfiwx 0, 0, 4 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + %out = or i32 %extract, 1 + store i32 %out, ptr %dest, align 4 + ret void +} + +define void @simpleOneInstructionPromotionVariableIdx64(ptr %addr1, ptr %dest, i32 %idx) { +; ASM-P8-LABEL: simpleOneInstructionPromotionVariableIdx64: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: rlwinm 5, 5, 3, 28, 28 +; ASM-P8-NEXT: ldx 3, 3, 5 +; ASM-P8-NEXT: ori 3, 3, 1 +; ASM-P8-NEXT: std 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: simpleOneInstructionPromotionVariableIdx64: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: addis 6, 2, .LCPI23_0@toc@ha +; ASM-STRESS-P8-NEXT: addi 3, 6, .LCPI23_0@toc@l +; ASM-STRESS-P8-NEXT: xxswapd 34, 0 +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: clrldi 3, 5, 32 +; ASM-STRESS-P8-NEXT: li 5, 1 +; ASM-STRESS-P8-NEXT: andc 3, 5, 3 +; ASM-STRESS-P8-NEXT: sldi 3, 3, 3 +; ASM-STRESS-P8-NEXT: xxlor 34, 34, 0 +; ASM-STRESS-P8-NEXT: lvsl 3, 0, 3 +; ASM-STRESS-P8-NEXT: vperm 2, 2, 2, 3 +; ASM-STRESS-P8-NEXT: mfvsrd 3, 34 +; ASM-STRESS-P8-NEXT: std 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: simpleOneInstructionPromotionVariableIdx64: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: rlwinm 5, 5, 3, 28, 28 +; ASM-P9-NEXT: ldx 3, 3, 5 +; ASM-P9-NEXT: ori 3, 3, 1 +; ASM-P9-NEXT: std 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: simpleOneInstructionPromotionVariableIdx64: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lxv 0, 0(3) +; ASM-STRESS-P9-NEXT: addis 3, 2, .LCPI23_0@toc@ha +; ASM-STRESS-P9-NEXT: addi 3, 3, .LCPI23_0@toc@l +; ASM-STRESS-P9-NEXT: lxv 1, 0(3) +; ASM-STRESS-P9-NEXT: clrldi 3, 5, 32 +; ASM-STRESS-P9-NEXT: li 5, 1 +; ASM-STRESS-P9-NEXT: andc 3, 5, 3 +; ASM-STRESS-P9-NEXT: sldi 3, 3, 3 +; ASM-STRESS-P9-NEXT: xxlor 34, 0, 1 +; ASM-STRESS-P9-NEXT: lvsl 3, 0, 3 +; ASM-STRESS-P9-NEXT: vperm 2, 2, 2, 3 +; ASM-STRESS-P9-NEXT: mfvsrd 3, 34 +; ASM-STRESS-P9-NEXT: std 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: simpleOneInstructionPromotionVariableIdx64: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: rlwinm 5, 5, 3, 28, 28 +; ASM-P10-NEXT: ldx 3, 3, 5 +; ASM-P10-NEXT: ori 3, 3, 1 +; ASM-P10-NEXT: std 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: simpleOneInstructionPromotionVariableIdx64: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 0, 0(3) +; ASM-STRESS-P10-NEXT: clrldi 3, 5, 32 +; ASM-STRESS-P10-NEXT: li 5, 1 +; ASM-STRESS-P10-NEXT: xxlxor 1, 1, 1 +; ASM-STRESS-P10-NEXT: andc 3, 5, 3 +; ASM-STRESS-P10-NEXT: xxsplti32dx 1, 1, 1 +; ASM-STRESS-P10-NEXT: sldi 3, 3, 3 +; ASM-STRESS-P10-NEXT: xxlor 34, 0, 1 +; ASM-STRESS-P10-NEXT: lvsl 3, 0, 3 +; ASM-STRESS-P10-NEXT: vperm 2, 2, 2, 3 +; ASM-STRESS-P10-NEXT: mfvsrd 3, 34 +; ASM-STRESS-P10-NEXT: std 3, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <2 x i64>, ptr %addr1, align 16 + %extract = extractelement <2 x i64> %in1, i32 %idx + %out = or i64 %extract, 1 + store i64 %out, ptr %dest, align 8 + ret void +} + +define void @simpleOneInstructionPromotionVariableIdx32(ptr %addr1, ptr %dest, i32 %idx) { +; ASM-P8-LABEL: simpleOneInstructionPromotionVariableIdx32: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: rlwinm 5, 5, 2, 28, 29 +; ASM-P8-NEXT: lwzx 3, 3, 5 +; ASM-P8-NEXT: ori 3, 3, 1 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: simpleOneInstructionPromotionVariableIdx32: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-STRESS-P8-NEXT: vspltisw 2, 1 +; ASM-STRESS-P8-NEXT: clrldi 3, 5, 32 +; ASM-STRESS-P8-NEXT: li 5, 2 +; ASM-STRESS-P8-NEXT: andc 5, 5, 3 +; ASM-STRESS-P8-NEXT: sldi 5, 5, 2 +; ASM-STRESS-P8-NEXT: xxswapd 35, 0 +; ASM-STRESS-P8-NEXT: xxlor 34, 35, 34 +; ASM-STRESS-P8-NEXT: lvsl 3, 0, 5 +; ASM-STRESS-P8-NEXT: li 5, 1 +; ASM-STRESS-P8-NEXT: and 3, 5, 3 +; ASM-STRESS-P8-NEXT: vperm 2, 2, 2, 3 +; ASM-STRESS-P8-NEXT: sldi 3, 3, 5 +; ASM-STRESS-P8-NEXT: mfvsrd 6, 34 +; ASM-STRESS-P8-NEXT: srd 3, 6, 3 +; ASM-STRESS-P8-NEXT: stw 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: simpleOneInstructionPromotionVariableIdx32: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: rlwinm 5, 5, 2, 28, 29 +; ASM-P9-NEXT: lwzx 3, 3, 5 +; ASM-P9-NEXT: ori 3, 3, 1 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: simpleOneInstructionPromotionVariableIdx32: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lxv 0, 0(3) +; ASM-STRESS-P9-NEXT: vspltisw 2, 1 +; ASM-STRESS-P9-NEXT: clrldi 3, 5, 32 +; ASM-STRESS-P9-NEXT: rlwinm 3, 3, 2, 28, 29 +; ASM-STRESS-P9-NEXT: xxlor 34, 0, 34 +; ASM-STRESS-P9-NEXT: vextuwrx 3, 3, 2 +; ASM-STRESS-P9-NEXT: stw 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: simpleOneInstructionPromotionVariableIdx32: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: rlwinm 5, 5, 2, 28, 29 +; ASM-P10-NEXT: lwzx 3, 3, 5 +; ASM-P10-NEXT: ori 3, 3, 1 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: simpleOneInstructionPromotionVariableIdx32: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lxv 0, 0(3) +; ASM-STRESS-P10-NEXT: vspltisw 2, 1 +; ASM-STRESS-P10-NEXT: clrldi 3, 5, 32 +; ASM-STRESS-P10-NEXT: rlwinm 3, 3, 2, 28, 29 +; ASM-STRESS-P10-NEXT: xxlor 34, 0, 34 +; ASM-STRESS-P10-NEXT: vextuwrx 3, 3, 2 +; ASM-STRESS-P10-NEXT: stw 3, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 %idx + %out = or i32 %extract, 1 + store i32 %out, ptr %dest, align 4 + ret void +} + +define void @undefConstantFRemCaseWithFastMath(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: undefConstantFRemCaseWithFastMath: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: mflr 0 +; ASM-P8-NEXT: .cfi_def_cfa_offset 48 +; ASM-P8-NEXT: .cfi_offset lr, 16 +; ASM-P8-NEXT: .cfi_offset r30, -16 +; ASM-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P8-NEXT: stdu 1, -48(1) +; ASM-P8-NEXT: std 0, 64(1) +; ASM-P8-NEXT: mr 30, 4 +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: xxswapd 34, 0 +; ASM-P8-NEXT: xxsldwi 0, 34, 34, 1 +; ASM-P8-NEXT: vspltisw 2, 7 +; ASM-P8-NEXT: xscvspdpn 1, 0 +; ASM-P8-NEXT: xvcvsxwdp 2, 34 +; ASM-P8-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-P8-NEXT: bl fmodf +; ASM-P8-NEXT: nop +; ASM-P8-NEXT: stfs 1, 0(30) +; ASM-P8-NEXT: addi 1, 1, 48 +; ASM-P8-NEXT: ld 0, 16(1) +; ASM-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P8-NEXT: mtlr 0 +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: undefConstantFRemCaseWithFastMath: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: mflr 0 +; ASM-STRESS-P8-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P8-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P8-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P8-NEXT: stdu 1, -48(1) +; ASM-STRESS-P8-NEXT: std 0, 64(1) +; ASM-STRESS-P8-NEXT: vspltisw 2, 7 +; ASM-STRESS-P8-NEXT: mr 30, 4 +; ASM-STRESS-P8-NEXT: lfs 1, 8(3) +; ASM-STRESS-P8-NEXT: xvcvsxwdp 2, 34 +; ASM-STRESS-P8-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-STRESS-P8-NEXT: bl fmodf +; ASM-STRESS-P8-NEXT: nop +; ASM-STRESS-P8-NEXT: stfs 1, 0(30) +; ASM-STRESS-P8-NEXT: addi 1, 1, 48 +; ASM-STRESS-P8-NEXT: ld 0, 16(1) +; ASM-STRESS-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P8-NEXT: mtlr 0 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: undefConstantFRemCaseWithFastMath: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: mflr 0 +; ASM-P9-NEXT: .cfi_def_cfa_offset 48 +; ASM-P9-NEXT: .cfi_offset lr, 16 +; ASM-P9-NEXT: .cfi_offset r30, -16 +; ASM-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P9-NEXT: stdu 1, -48(1) +; ASM-P9-NEXT: std 0, 64(1) +; ASM-P9-NEXT: vspltisw 2, 7 +; ASM-P9-NEXT: mr 30, 4 +; ASM-P9-NEXT: lfs 1, 8(3) +; ASM-P9-NEXT: xvcvsxwdp 2, 34 +; ASM-P9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-P9-NEXT: bl fmodf +; ASM-P9-NEXT: nop +; ASM-P9-NEXT: stfs 1, 0(30) +; ASM-P9-NEXT: addi 1, 1, 48 +; ASM-P9-NEXT: ld 0, 16(1) +; ASM-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P9-NEXT: mtlr 0 +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: undefConstantFRemCaseWithFastMath: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: mflr 0 +; ASM-STRESS-P9-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P9-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P9-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P9-NEXT: stdu 1, -48(1) +; ASM-STRESS-P9-NEXT: std 0, 64(1) +; ASM-STRESS-P9-NEXT: vspltisw 2, 7 +; ASM-STRESS-P9-NEXT: mr 30, 4 +; ASM-STRESS-P9-NEXT: lfs 1, 8(3) +; ASM-STRESS-P9-NEXT: xvcvsxwdp 2, 34 +; ASM-STRESS-P9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; ASM-STRESS-P9-NEXT: bl fmodf +; ASM-STRESS-P9-NEXT: nop +; ASM-STRESS-P9-NEXT: stfs 1, 0(30) +; ASM-STRESS-P9-NEXT: addi 1, 1, 48 +; ASM-STRESS-P9-NEXT: ld 0, 16(1) +; ASM-STRESS-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P9-NEXT: mtlr 0 +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: undefConstantFRemCaseWithFastMath: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: mflr 0 +; ASM-P10-NEXT: .cfi_def_cfa_offset 48 +; ASM-P10-NEXT: .cfi_offset lr, 16 +; ASM-P10-NEXT: .cfi_offset r30, -16 +; ASM-P10-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P10-NEXT: std 0, 16(1) +; ASM-P10-NEXT: stdu 1, -48(1) +; ASM-P10-NEXT: lfs 1, 8(3) +; ASM-P10-NEXT: xxspltidp 2, 1088421888 +; ASM-P10-NEXT: mr 30, 4 +; ASM-P10-NEXT: bl fmodf@notoc +; ASM-P10-NEXT: stfs 1, 0(30) +; ASM-P10-NEXT: addi 1, 1, 48 +; ASM-P10-NEXT: ld 0, 16(1) +; ASM-P10-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P10-NEXT: mtlr 0 +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: undefConstantFRemCaseWithFastMath: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: mflr 0 +; ASM-STRESS-P10-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P10-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P10-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P10-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P10-NEXT: std 0, 16(1) +; ASM-STRESS-P10-NEXT: stdu 1, -48(1) +; ASM-STRESS-P10-NEXT: lfs 1, 8(3) +; ASM-STRESS-P10-NEXT: xxspltidp 2, 1088421888 +; ASM-STRESS-P10-NEXT: mr 30, 4 +; ASM-STRESS-P10-NEXT: bl fmodf@notoc +; ASM-STRESS-P10-NEXT: stfs 1, 0(30) +; ASM-STRESS-P10-NEXT: addi 1, 1, 48 +; ASM-STRESS-P10-NEXT: ld 0, 16(1) +; ASM-STRESS-P10-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P10-NEXT: mtlr 0 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x float>, ptr %addr1, align 16 + %extract = extractelement <4 x float> %in1, i32 2 + %out = frem nnan float %extract, 7.0 + store float %out, ptr %dest, align 4 + ret void +} + +define void @undefVectorFRemCaseWithFastMath(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: undefVectorFRemCaseWithFastMath: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: mflr 0 +; ASM-P8-NEXT: .cfi_def_cfa_offset 48 +; ASM-P8-NEXT: .cfi_offset lr, 16 +; ASM-P8-NEXT: .cfi_offset r30, -16 +; ASM-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P8-NEXT: stdu 1, -48(1) +; ASM-P8-NEXT: std 0, 64(1) +; ASM-P8-NEXT: mr 30, 4 +; ASM-P8-NEXT: lxvd2x 0, 0, 3 +; ASM-P8-NEXT: xxswapd 34, 0 +; ASM-P8-NEXT: xxsldwi 0, 34, 34, 1 +; ASM-P8-NEXT: vspltisw 2, 7 +; ASM-P8-NEXT: xscvspdpn 2, 0 +; ASM-P8-NEXT: xvcvsxwdp 1, 34 +; ASM-P8-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; ASM-P8-NEXT: bl fmodf +; ASM-P8-NEXT: nop +; ASM-P8-NEXT: stfs 1, 0(30) +; ASM-P8-NEXT: addi 1, 1, 48 +; ASM-P8-NEXT: ld 0, 16(1) +; ASM-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P8-NEXT: mtlr 0 +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: undefVectorFRemCaseWithFastMath: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: mflr 0 +; ASM-STRESS-P8-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P8-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P8-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P8-NEXT: stdu 1, -48(1) +; ASM-STRESS-P8-NEXT: std 0, 64(1) +; ASM-STRESS-P8-NEXT: vspltisw 2, 7 +; ASM-STRESS-P8-NEXT: mr 30, 4 +; ASM-STRESS-P8-NEXT: lfs 2, 8(3) +; ASM-STRESS-P8-NEXT: xvcvsxwdp 1, 34 +; ASM-STRESS-P8-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; ASM-STRESS-P8-NEXT: bl fmodf +; ASM-STRESS-P8-NEXT: nop +; ASM-STRESS-P8-NEXT: stfs 1, 0(30) +; ASM-STRESS-P8-NEXT: addi 1, 1, 48 +; ASM-STRESS-P8-NEXT: ld 0, 16(1) +; ASM-STRESS-P8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P8-NEXT: mtlr 0 +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: undefVectorFRemCaseWithFastMath: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: mflr 0 +; ASM-P9-NEXT: .cfi_def_cfa_offset 48 +; ASM-P9-NEXT: .cfi_offset lr, 16 +; ASM-P9-NEXT: .cfi_offset r30, -16 +; ASM-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P9-NEXT: stdu 1, -48(1) +; ASM-P9-NEXT: std 0, 64(1) +; ASM-P9-NEXT: vspltisw 2, 7 +; ASM-P9-NEXT: mr 30, 4 +; ASM-P9-NEXT: lfs 2, 8(3) +; ASM-P9-NEXT: xvcvsxwdp 1, 34 +; ASM-P9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; ASM-P9-NEXT: bl fmodf +; ASM-P9-NEXT: nop +; ASM-P9-NEXT: stfs 1, 0(30) +; ASM-P9-NEXT: addi 1, 1, 48 +; ASM-P9-NEXT: ld 0, 16(1) +; ASM-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P9-NEXT: mtlr 0 +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: undefVectorFRemCaseWithFastMath: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: mflr 0 +; ASM-STRESS-P9-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P9-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P9-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P9-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P9-NEXT: stdu 1, -48(1) +; ASM-STRESS-P9-NEXT: std 0, 64(1) +; ASM-STRESS-P9-NEXT: vspltisw 2, 7 +; ASM-STRESS-P9-NEXT: mr 30, 4 +; ASM-STRESS-P9-NEXT: lfs 2, 8(3) +; ASM-STRESS-P9-NEXT: xvcvsxwdp 1, 34 +; ASM-STRESS-P9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; ASM-STRESS-P9-NEXT: bl fmodf +; ASM-STRESS-P9-NEXT: nop +; ASM-STRESS-P9-NEXT: stfs 1, 0(30) +; ASM-STRESS-P9-NEXT: addi 1, 1, 48 +; ASM-STRESS-P9-NEXT: ld 0, 16(1) +; ASM-STRESS-P9-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P9-NEXT: mtlr 0 +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: undefVectorFRemCaseWithFastMath: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: mflr 0 +; ASM-P10-NEXT: .cfi_def_cfa_offset 48 +; ASM-P10-NEXT: .cfi_offset lr, 16 +; ASM-P10-NEXT: .cfi_offset r30, -16 +; ASM-P10-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-P10-NEXT: std 0, 16(1) +; ASM-P10-NEXT: stdu 1, -48(1) +; ASM-P10-NEXT: lfs 2, 8(3) +; ASM-P10-NEXT: xxspltidp 1, 1088421888 +; ASM-P10-NEXT: mr 30, 4 +; ASM-P10-NEXT: bl fmodf@notoc +; ASM-P10-NEXT: stfs 1, 0(30) +; ASM-P10-NEXT: addi 1, 1, 48 +; ASM-P10-NEXT: ld 0, 16(1) +; ASM-P10-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-P10-NEXT: mtlr 0 +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: undefVectorFRemCaseWithFastMath: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: mflr 0 +; ASM-STRESS-P10-NEXT: .cfi_def_cfa_offset 48 +; ASM-STRESS-P10-NEXT: .cfi_offset lr, 16 +; ASM-STRESS-P10-NEXT: .cfi_offset r30, -16 +; ASM-STRESS-P10-NEXT: std 30, -16(1) # 8-byte Folded Spill +; ASM-STRESS-P10-NEXT: std 0, 16(1) +; ASM-STRESS-P10-NEXT: stdu 1, -48(1) +; ASM-STRESS-P10-NEXT: lfs 2, 8(3) +; ASM-STRESS-P10-NEXT: xxspltidp 1, 1088421888 +; ASM-STRESS-P10-NEXT: mr 30, 4 +; ASM-STRESS-P10-NEXT: bl fmodf@notoc +; ASM-STRESS-P10-NEXT: stfs 1, 0(30) +; ASM-STRESS-P10-NEXT: addi 1, 1, 48 +; ASM-STRESS-P10-NEXT: ld 0, 16(1) +; ASM-STRESS-P10-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; ASM-STRESS-P10-NEXT: mtlr 0 +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x float>, ptr %addr1, align 16 + %extract = extractelement <4 x float> %in1, i32 2 + %out = frem nnan float 7.0, %extract + store float %out, ptr %dest, align 4 + ret void +} + +define void @unsupportedChainInDifferentBBs(ptr %addr1, ptr %dest, i1 %bool) { +; ASM-P8-LABEL: unsupportedChainInDifferentBBs: +; ASM-P8: # %bb.0: # %bb1 +; ASM-P8-NEXT: andi. 5, 5, 1 +; ASM-P8-NEXT: bclr 4, 1, 0 +; ASM-P8-NEXT: # %bb.1: # %bb2 +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: ori 3, 3, 1 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: unsupportedChainInDifferentBBs: +; ASM-STRESS-P8: # %bb.0: # %bb1 +; ASM-STRESS-P8-NEXT: andi. 5, 5, 1 +; ASM-STRESS-P8-NEXT: bclr 4, 1, 0 +; ASM-STRESS-P8-NEXT: # %bb.1: # %bb2 +; ASM-STRESS-P8-NEXT: lwz 3, 8(3) +; ASM-STRESS-P8-NEXT: ori 3, 3, 1 +; ASM-STRESS-P8-NEXT: stw 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: unsupportedChainInDifferentBBs: +; ASM-P9: # %bb.0: # %bb1 +; ASM-P9-NEXT: andi. 5, 5, 1 +; ASM-P9-NEXT: bclr 4, 1, 0 +; ASM-P9-NEXT: # %bb.1: # %bb2 +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: ori 3, 3, 1 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: unsupportedChainInDifferentBBs: +; ASM-STRESS-P9: # %bb.0: # %bb1 +; ASM-STRESS-P9-NEXT: andi. 5, 5, 1 +; ASM-STRESS-P9-NEXT: bclr 4, 1, 0 +; ASM-STRESS-P9-NEXT: # %bb.1: # %bb2 +; ASM-STRESS-P9-NEXT: lwz 3, 8(3) +; ASM-STRESS-P9-NEXT: ori 3, 3, 1 +; ASM-STRESS-P9-NEXT: stw 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: unsupportedChainInDifferentBBs: +; ASM-P10: # %bb.0: # %bb1 +; ASM-P10-NEXT: andi. 5, 5, 1 +; ASM-P10-NEXT: bclr 4, 1, 0 +; ASM-P10-NEXT: # %bb.1: # %bb2 +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: ori 3, 3, 1 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: unsupportedChainInDifferentBBs: +; ASM-STRESS-P10: # %bb.0: # %bb1 +; ASM-STRESS-P10-NEXT: andi. 5, 5, 1 +; ASM-STRESS-P10-NEXT: bclr 4, 1, 0 +; ASM-STRESS-P10-NEXT: # %bb.1: # %bb2 +; ASM-STRESS-P10-NEXT: lwz 3, 8(3) +; ASM-STRESS-P10-NEXT: ori 3, 3, 1 +; ASM-STRESS-P10-NEXT: stw 3, 0(4) +; ASM-STRESS-P10-NEXT: blr +bb1: + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + br i1 %bool, label %bb2, label %end +bb2: + %out = or i32 %extract, 1 + store i32 %out, ptr %dest, align 4 + br label %end +end: + ret void +} + +define void @unsupportedInstructionForPromotion(ptr %addr1, i32 %in2, ptr %dest) { +; ASM-P8-LABEL: unsupportedInstructionForPromotion: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: xor 3, 3, 4 +; ASM-P8-NEXT: cntlzw 3, 3 +; ASM-P8-NEXT: srwi 3, 3, 5 +; ASM-P8-NEXT: stb 3, 0(5) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: unsupportedInstructionForPromotion: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lwz 3, 8(3) +; ASM-STRESS-P8-NEXT: xor 3, 3, 4 +; ASM-STRESS-P8-NEXT: cntlzw 3, 3 +; ASM-STRESS-P8-NEXT: srwi 3, 3, 5 +; ASM-STRESS-P8-NEXT: stb 3, 0(5) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: unsupportedInstructionForPromotion: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: xor 3, 3, 4 +; ASM-P9-NEXT: cntlzw 3, 3 +; ASM-P9-NEXT: srwi 3, 3, 5 +; ASM-P9-NEXT: stb 3, 0(5) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: unsupportedInstructionForPromotion: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lwz 3, 8(3) +; ASM-STRESS-P9-NEXT: xor 3, 3, 4 +; ASM-STRESS-P9-NEXT: cntlzw 3, 3 +; ASM-STRESS-P9-NEXT: srwi 3, 3, 5 +; ASM-STRESS-P9-NEXT: stb 3, 0(5) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: unsupportedInstructionForPromotion: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: cmpw 3, 4 +; ASM-P10-NEXT: setbc 3, 2 +; ASM-P10-NEXT: stb 3, 0(5) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: unsupportedInstructionForPromotion: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lwz 3, 8(3) +; ASM-STRESS-P10-NEXT: cmpw 3, 4 +; ASM-STRESS-P10-NEXT: setbc 3, 2 +; ASM-STRESS-P10-NEXT: stb 3, 0(5) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + %out = icmp eq i32 %extract, %in2 + store i1 %out, ptr %dest, align 4 + ret void +} + +define i32 @unsupportedMultiUses(ptr %addr1, ptr %dest) { +; ASM-P8-LABEL: unsupportedMultiUses: +; ASM-P8: # %bb.0: +; ASM-P8-NEXT: lwz 3, 8(3) +; ASM-P8-NEXT: ori 3, 3, 1 +; ASM-P8-NEXT: stw 3, 0(4) +; ASM-P8-NEXT: blr +; +; ASM-STRESS-P8-LABEL: unsupportedMultiUses: +; ASM-STRESS-P8: # %bb.0: +; ASM-STRESS-P8-NEXT: lwz 3, 8(3) +; ASM-STRESS-P8-NEXT: ori 3, 3, 1 +; ASM-STRESS-P8-NEXT: stw 3, 0(4) +; ASM-STRESS-P8-NEXT: blr +; +; ASM-P9-LABEL: unsupportedMultiUses: +; ASM-P9: # %bb.0: +; ASM-P9-NEXT: lwz 3, 8(3) +; ASM-P9-NEXT: ori 3, 3, 1 +; ASM-P9-NEXT: stw 3, 0(4) +; ASM-P9-NEXT: blr +; +; ASM-STRESS-P9-LABEL: unsupportedMultiUses: +; ASM-STRESS-P9: # %bb.0: +; ASM-STRESS-P9-NEXT: lwz 3, 8(3) +; ASM-STRESS-P9-NEXT: ori 3, 3, 1 +; ASM-STRESS-P9-NEXT: stw 3, 0(4) +; ASM-STRESS-P9-NEXT: blr +; +; ASM-P10-LABEL: unsupportedMultiUses: +; ASM-P10: # %bb.0: +; ASM-P10-NEXT: lwz 3, 8(3) +; ASM-P10-NEXT: ori 3, 3, 1 +; ASM-P10-NEXT: stw 3, 0(4) +; ASM-P10-NEXT: blr +; +; ASM-STRESS-P10-LABEL: unsupportedMultiUses: +; ASM-STRESS-P10: # %bb.0: +; ASM-STRESS-P10-NEXT: lwz 3, 8(3) +; ASM-STRESS-P10-NEXT: ori 3, 3, 1 +; ASM-STRESS-P10-NEXT: stw 3, 0(4) +; ASM-STRESS-P10-NEXT: blr + %in1 = load <4 x i32>, ptr %addr1, align 16 + %extract = extractelement <4 x i32> %in1, i32 2 + %out = or i32 %extract, 1 + store i32 %out, ptr %dest, align 4 + ret i32 %out +}