diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10223,14 +10223,16 @@ if (isLittleEndian) std::swap(V1, V2); - if (Subtarget.isISA3_0() && (V1->hasOneUse() || V2->hasOneUse())) { + if (Subtarget.hasVSX() && Subtarget.hasP9Vector() && + (V1->hasOneUse() || V2->hasOneUse())) { LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using " "XXPERM instead\n"); Opcode = PPCISD::XXPERM; - // if V2 is dead, then we swap V1 and V2 so we can - // use V2 as the destination instead. - if (!V1->hasOneUse() && V2->hasOneUse()) { + // The second input to XXPERM is also an output so if the second input has + // multiple uses then copying is necessary, as a result we want the + // single-use operand to be used as the second input to prevent copying. + if (!V2->hasOneUse() && V1->hasOneUse()) { std::swap(V1, V2); NeedSwap = !NeedSwap; } diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -137,21 +137,20 @@ ; P9LE-LABEL: test32: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: add 5, 3, 4 -; P9LE-NEXT: lfiwzx 0, 3, 4 +; P9LE-NEXT: lxsiwzx 2, 3, 4 ; P9LE-NEXT: addis 3, 2, .LCPI1_0@toc@ha -; P9LE-NEXT: xxlxor 3, 3, 3 +; P9LE-NEXT: xxlxor 0, 0, 0 ; P9LE-NEXT: vspltisw 4, 8 ; P9LE-NEXT: addi 3, 3, .LCPI1_0@toc@l ; P9LE-NEXT: lxv 1, 0(3) ; P9LE-NEXT: li 3, 4 -; P9LE-NEXT: xxlxor 2, 2, 2 ; P9LE-NEXT: vadduwm 4, 4, 4 -; P9LE-NEXT: xxperm 3, 0, 1 -; P9LE-NEXT: lfiwzx 0, 5, 3 +; P9LE-NEXT: lxsiwzx 3, 5, 3 ; P9LE-NEXT: xxperm 2, 0, 1 -; P9LE-NEXT: vnegw 2, 2 -; P9LE-NEXT: vslw 2, 2, 4 -; P9LE-NEXT: vsubuwm 2, 2, 3 +; P9LE-NEXT: xxperm 3, 0, 1 +; P9LE-NEXT: vnegw 3, 3 +; P9LE-NEXT: vslw 3, 3, 4 +; P9LE-NEXT: vsubuwm 2, 3, 2 ; P9LE-NEXT: xxswapd 0, 2 ; P9LE-NEXT: stxv 0, 0(3) ; P9LE-NEXT: blr @@ -159,21 +158,20 @@ ; P9BE-LABEL: test32: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: add 5, 3, 4 -; P9BE-NEXT: lfiwzx 0, 3, 4 +; P9BE-NEXT: lxsiwzx 2, 3, 4 ; P9BE-NEXT: addis 3, 2, .LCPI1_0@toc@ha -; P9BE-NEXT: xxlxor 3, 3, 3 +; P9BE-NEXT: xxlxor 0, 0, 0 ; P9BE-NEXT: vspltisw 4, 8 ; P9BE-NEXT: addi 3, 3, .LCPI1_0@toc@l ; P9BE-NEXT: lxv 1, 0(3) ; P9BE-NEXT: li 3, 4 -; P9BE-NEXT: xxlxor 2, 2, 2 ; P9BE-NEXT: vadduwm 4, 4, 4 -; P9BE-NEXT: xxperm 3, 0, 1 -; P9BE-NEXT: lfiwzx 0, 5, 3 +; P9BE-NEXT: lxsiwzx 3, 5, 3 ; P9BE-NEXT: xxperm 2, 0, 1 -; P9BE-NEXT: vnegw 2, 2 -; P9BE-NEXT: vslw 2, 2, 4 -; P9BE-NEXT: vsubuwm 2, 2, 3 +; P9BE-NEXT: xxperm 3, 0, 1 +; P9BE-NEXT: vnegw 3, 3 +; P9BE-NEXT: vslw 3, 3, 4 +; P9BE-NEXT: vsubuwm 2, 3, 2 ; P9BE-NEXT: xxswapd 0, 2 ; P9BE-NEXT: stxv 0, 0(3) ; P9BE-NEXT: blr @@ -181,20 +179,19 @@ ; P9BE-AIX-LABEL: test32: ; P9BE-AIX: # %bb.0: # %entry ; P9BE-AIX-NEXT: add 5, 3, 4 -; P9BE-AIX-NEXT: lfiwzx 0, 3, 4 +; P9BE-AIX-NEXT: lxsiwzx 2, 3, 4 ; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0 -; P9BE-AIX-NEXT: xxlxor 3, 3, 3 -; P9BE-AIX-NEXT: xxlxor 2, 2, 2 +; P9BE-AIX-NEXT: xxlxor 0, 0, 0 ; P9BE-AIX-NEXT: vspltisw 4, 8 -; P9BE-AIX-NEXT: vadduwm 4, 4, 4 ; P9BE-AIX-NEXT: lxv 1, 0(3) ; P9BE-AIX-NEXT: li 3, 4 -; P9BE-AIX-NEXT: xxperm 3, 0, 1 -; P9BE-AIX-NEXT: lfiwzx 0, 5, 3 +; P9BE-AIX-NEXT: vadduwm 4, 4, 4 +; P9BE-AIX-NEXT: lxsiwzx 3, 5, 3 ; P9BE-AIX-NEXT: xxperm 2, 0, 1 -; P9BE-AIX-NEXT: vnegw 2, 2 -; P9BE-AIX-NEXT: vslw 2, 2, 4 -; P9BE-AIX-NEXT: vsubuwm 2, 2, 3 +; P9BE-AIX-NEXT: xxperm 3, 0, 1 +; P9BE-AIX-NEXT: vnegw 3, 3 +; P9BE-AIX-NEXT: vslw 3, 3, 4 +; P9BE-AIX-NEXT: vsubuwm 2, 3, 2 ; P9BE-AIX-NEXT: xxswapd 0, 2 ; P9BE-AIX-NEXT: stxv 0, 0(3) ; P9BE-AIX-NEXT: blr @@ -202,20 +199,19 @@ ; P9BE-AIX32-LABEL: test32: ; P9BE-AIX32: # %bb.0: # %entry ; P9BE-AIX32-NEXT: add 5, 3, 4 -; P9BE-AIX32-NEXT: lfiwzx 0, 3, 4 +; P9BE-AIX32-NEXT: lxsiwzx 2, 3, 4 ; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0 -; P9BE-AIX32-NEXT: xxlxor 3, 3, 3 -; P9BE-AIX32-NEXT: xxlxor 2, 2, 2 +; P9BE-AIX32-NEXT: xxlxor 0, 0, 0 ; P9BE-AIX32-NEXT: vspltisw 4, 8 -; P9BE-AIX32-NEXT: vadduwm 4, 4, 4 ; P9BE-AIX32-NEXT: lxv 1, 0(3) ; P9BE-AIX32-NEXT: li 3, 4 -; P9BE-AIX32-NEXT: xxperm 3, 0, 1 -; P9BE-AIX32-NEXT: lfiwzx 0, 5, 3 +; P9BE-AIX32-NEXT: vadduwm 4, 4, 4 +; P9BE-AIX32-NEXT: lxsiwzx 3, 5, 3 ; P9BE-AIX32-NEXT: xxperm 2, 0, 1 -; P9BE-AIX32-NEXT: vnegw 2, 2 -; P9BE-AIX32-NEXT: vslw 2, 2, 4 -; P9BE-AIX32-NEXT: vsubuwm 2, 2, 3 +; P9BE-AIX32-NEXT: xxperm 3, 0, 1 +; P9BE-AIX32-NEXT: vnegw 3, 3 +; P9BE-AIX32-NEXT: vslw 3, 3, 4 +; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2 ; P9BE-AIX32-NEXT: xxswapd 0, 2 ; P9BE-AIX32-NEXT: stxv 0, 0(3) ; P9BE-AIX32-NEXT: blr @@ -272,25 +268,24 @@ ; P9BE-NEXT: sldi 4, 4, 1 ; P9BE-NEXT: li 7, 16 ; P9BE-NEXT: add 6, 3, 4 -; P9BE-NEXT: lxsihzx 0, 6, 7 +; P9BE-NEXT: lxsihzx 1, 3, 4 +; P9BE-NEXT: addis 3, 2, .LCPI2_1@toc@ha +; P9BE-NEXT: lxsihzx 2, 6, 7 ; P9BE-NEXT: addis 6, 2, .LCPI2_0@toc@ha +; P9BE-NEXT: addi 3, 3, .LCPI2_1@toc@l ; P9BE-NEXT: addi 6, 6, .LCPI2_0@toc@l -; P9BE-NEXT: lxv 1, 0(6) +; P9BE-NEXT: lxv 0, 0(6) ; P9BE-NEXT: li 6, 0 -; P9BE-NEXT: mtvsrwz 2, 6 -; P9BE-NEXT: vmr 3, 2 -; P9BE-NEXT: vsplth 4, 2, 3 -; P9BE-NEXT: xxperm 3, 0, 1 -; P9BE-NEXT: lxsihzx 0, 3, 4 -; P9BE-NEXT: addis 3, 2, .LCPI2_1@toc@ha -; P9BE-NEXT: addi 3, 3, .LCPI2_1@toc@l -; P9BE-NEXT: xxperm 2, 0, 1 +; P9BE-NEXT: mtvsrwz 3, 6 +; P9BE-NEXT: xxperm 2, 3, 0 +; P9BE-NEXT: xxperm 1, 3, 0 +; P9BE-NEXT: vsplth 3, 3, 3 ; P9BE-NEXT: lxv 0, 0(3) ; P9BE-NEXT: li 3, 0 -; P9BE-NEXT: xxmrghw 2, 4, 2 -; P9BE-NEXT: xxperm 3, 2, 0 -; P9BE-NEXT: xxspltw 2, 3, 1 -; P9BE-NEXT: vadduwm 2, 3, 2 +; P9BE-NEXT: xxmrghw 3, 3, 1 +; P9BE-NEXT: xxperm 2, 3, 0 +; P9BE-NEXT: xxspltw 3, 2, 1 +; P9BE-NEXT: vadduwm 2, 2, 3 ; P9BE-NEXT: vextuwlx 3, 3, 2 ; P9BE-NEXT: cmpw 3, 5 ; P9BE-NEXT: bgelr+ 0 @@ -301,23 +296,22 @@ ; P9BE-AIX-NEXT: sldi 4, 4, 1 ; P9BE-AIX-NEXT: li 7, 16 ; P9BE-AIX-NEXT: add 6, 3, 4 -; P9BE-AIX-NEXT: lxsihzx 0, 6, 7 -; P9BE-AIX-NEXT: ld 6, L..C3(2) # %const.0 -; P9BE-AIX-NEXT: lxv 1, 0(6) +; P9BE-AIX-NEXT: lxsihzx 1, 3, 4 +; P9BE-AIX-NEXT: ld 3, L..C3(2) # %const.1 +; P9BE-AIX-NEXT: lxsihzx 2, 6, 7 +; P9BE-AIX-NEXT: ld 6, L..C4(2) # %const.0 +; P9BE-AIX-NEXT: lxv 0, 0(6) ; P9BE-AIX-NEXT: li 6, 0 -; P9BE-AIX-NEXT: mtvsrwz 2, 6 -; P9BE-AIX-NEXT: vmr 3, 2 -; P9BE-AIX-NEXT: vsplth 4, 2, 3 -; P9BE-AIX-NEXT: xxperm 3, 0, 1 -; P9BE-AIX-NEXT: lxsihzx 0, 3, 4 -; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1 -; P9BE-AIX-NEXT: xxperm 2, 0, 1 +; P9BE-AIX-NEXT: mtvsrwz 3, 6 +; P9BE-AIX-NEXT: xxperm 2, 3, 0 +; P9BE-AIX-NEXT: xxperm 1, 3, 0 +; P9BE-AIX-NEXT: vsplth 3, 3, 3 ; P9BE-AIX-NEXT: lxv 0, 0(3) ; P9BE-AIX-NEXT: li 3, 0 -; P9BE-AIX-NEXT: xxmrghw 2, 4, 2 -; P9BE-AIX-NEXT: xxperm 3, 2, 0 -; P9BE-AIX-NEXT: xxspltw 2, 3, 1 -; P9BE-AIX-NEXT: vadduwm 2, 3, 2 +; P9BE-AIX-NEXT: xxmrghw 3, 3, 1 +; P9BE-AIX-NEXT: xxperm 2, 3, 0 +; P9BE-AIX-NEXT: xxspltw 3, 2, 1 +; P9BE-AIX-NEXT: vadduwm 2, 2, 3 ; P9BE-AIX-NEXT: vextuwlx 3, 3, 2 ; P9BE-AIX-NEXT: cmpw 3, 5 ; P9BE-AIX-NEXT: bgelr+ 0 @@ -413,29 +407,26 @@ ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: add 6, 3, 4 ; P9BE-NEXT: li 7, 8 +; P9BE-NEXT: lxsibzx 3, 3, 4 +; P9BE-NEXT: addis 3, 2, .LCPI3_1@toc@ha ; P9BE-NEXT: lxsibzx 0, 6, 7 ; P9BE-NEXT: addis 6, 2, .LCPI3_0@toc@ha +; P9BE-NEXT: addi 3, 3, .LCPI3_1@toc@l ; P9BE-NEXT: addi 6, 6, .LCPI3_0@toc@l ; P9BE-NEXT: lxv 1, 0(6) ; P9BE-NEXT: li 6, 0 ; P9BE-NEXT: mtvsrwz 2, 6 -; P9BE-NEXT: vspltb 3, 2, 7 ; P9BE-NEXT: xxperm 0, 2, 1 -; P9BE-NEXT: lxsibzx 1, 3, 4 -; P9BE-NEXT: addis 3, 2, .LCPI3_1@toc@ha -; P9BE-NEXT: addi 3, 3, .LCPI3_1@toc@l -; P9BE-NEXT: lxv 2, 0(3) -; P9BE-NEXT: addis 3, 2, .LCPI3_2@toc@ha -; P9BE-NEXT: addi 3, 3, .LCPI3_2@toc@l -; P9BE-NEXT: xxperm 2, 1, 2 -; P9BE-NEXT: xxspltw 1, 3, 0 -; P9BE-NEXT: vmrghh 2, 2, 3 -; P9BE-NEXT: xxmrghw 2, 2, 0 +; P9BE-NEXT: xxperm 3, 2, 1 +; P9BE-NEXT: vspltb 2, 2, 7 +; P9BE-NEXT: vmrghh 3, 3, 2 +; P9BE-NEXT: xxspltw 1, 2, 0 +; P9BE-NEXT: xxmrghw 3, 3, 0 ; P9BE-NEXT: lxv 0, 0(3) ; P9BE-NEXT: li 3, 0 -; P9BE-NEXT: xxperm 2, 1, 0 -; P9BE-NEXT: xxspltw 3, 2, 1 -; P9BE-NEXT: vadduwm 2, 2, 3 +; P9BE-NEXT: xxperm 3, 1, 0 +; P9BE-NEXT: xxspltw 2, 3, 1 +; P9BE-NEXT: vadduwm 2, 3, 2 ; P9BE-NEXT: vextuwlx 3, 3, 2 ; P9BE-NEXT: cmpw 3, 5 ; P9BE-NEXT: bgelr+ 0 @@ -445,26 +436,24 @@ ; P9BE-AIX: # %bb.0: # %entry ; P9BE-AIX-NEXT: add 6, 3, 4 ; P9BE-AIX-NEXT: li 7, 8 +; P9BE-AIX-NEXT: lxsibzx 3, 3, 4 +; P9BE-AIX-NEXT: ld 3, L..C5(2) # %const.1 ; P9BE-AIX-NEXT: lxsibzx 0, 6, 7 -; P9BE-AIX-NEXT: ld 6, L..C5(2) # %const.0 +; P9BE-AIX-NEXT: ld 6, L..C6(2) # %const.0 ; P9BE-AIX-NEXT: lxv 1, 0(6) ; P9BE-AIX-NEXT: li 6, 0 ; P9BE-AIX-NEXT: mtvsrwz 2, 6 -; P9BE-AIX-NEXT: vspltb 3, 2, 7 ; P9BE-AIX-NEXT: xxperm 0, 2, 1 -; P9BE-AIX-NEXT: lxsibzx 1, 3, 4 -; P9BE-AIX-NEXT: ld 3, L..C6(2) # %const.1 -; P9BE-AIX-NEXT: lxv 2, 0(3) -; P9BE-AIX-NEXT: ld 3, L..C7(2) # %const.2 -; P9BE-AIX-NEXT: xxperm 2, 1, 2 -; P9BE-AIX-NEXT: xxspltw 1, 3, 0 -; P9BE-AIX-NEXT: vmrghh 2, 2, 3 -; P9BE-AIX-NEXT: xxmrghw 2, 2, 0 +; P9BE-AIX-NEXT: xxperm 3, 2, 1 +; P9BE-AIX-NEXT: vspltb 2, 2, 7 +; P9BE-AIX-NEXT: vmrghh 3, 3, 2 +; P9BE-AIX-NEXT: xxspltw 1, 2, 0 +; P9BE-AIX-NEXT: xxmrghw 3, 3, 0 ; P9BE-AIX-NEXT: lxv 0, 0(3) ; P9BE-AIX-NEXT: li 3, 0 -; P9BE-AIX-NEXT: xxperm 2, 1, 0 -; P9BE-AIX-NEXT: xxspltw 3, 2, 1 -; P9BE-AIX-NEXT: vadduwm 2, 2, 3 +; P9BE-AIX-NEXT: xxperm 3, 1, 0 +; P9BE-AIX-NEXT: xxspltw 2, 3, 1 +; P9BE-AIX-NEXT: vadduwm 2, 3, 2 ; P9BE-AIX-NEXT: vextuwlx 3, 3, 2 ; P9BE-AIX-NEXT: cmpw 3, 5 ; P9BE-AIX-NEXT: bgelr+ 0 @@ -474,25 +463,23 @@ ; P9BE-AIX32: # %bb.0: # %entry ; P9BE-AIX32-NEXT: add 6, 3, 4 ; P9BE-AIX32-NEXT: li 7, 8 +; P9BE-AIX32-NEXT: lxsibzx 3, 3, 4 +; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.1 ; P9BE-AIX32-NEXT: lxsibzx 0, 6, 7 -; P9BE-AIX32-NEXT: lwz 6, L..C4(2) # %const.0 +; P9BE-AIX32-NEXT: lwz 6, L..C5(2) # %const.0 ; P9BE-AIX32-NEXT: lxv 1, 0(6) ; P9BE-AIX32-NEXT: li 6, 0 ; P9BE-AIX32-NEXT: mtvsrwz 2, 6 -; P9BE-AIX32-NEXT: vspltb 3, 2, 7 ; P9BE-AIX32-NEXT: xxperm 0, 2, 1 -; P9BE-AIX32-NEXT: lxsibzx 1, 3, 4 -; P9BE-AIX32-NEXT: lwz 3, L..C5(2) # %const.1 -; P9BE-AIX32-NEXT: lxv 2, 0(3) -; P9BE-AIX32-NEXT: lwz 3, L..C6(2) # %const.2 -; P9BE-AIX32-NEXT: xxperm 2, 1, 2 -; P9BE-AIX32-NEXT: xxspltw 1, 3, 0 -; P9BE-AIX32-NEXT: vmrghh 2, 2, 3 -; P9BE-AIX32-NEXT: xxmrghw 2, 2, 0 +; P9BE-AIX32-NEXT: xxperm 3, 2, 1 +; P9BE-AIX32-NEXT: vspltb 2, 2, 7 +; P9BE-AIX32-NEXT: vmrghh 3, 3, 2 +; P9BE-AIX32-NEXT: xxspltw 1, 2, 0 +; P9BE-AIX32-NEXT: xxmrghw 3, 3, 0 ; P9BE-AIX32-NEXT: lxv 0, 0(3) -; P9BE-AIX32-NEXT: xxperm 2, 1, 0 -; P9BE-AIX32-NEXT: xxspltw 3, 2, 1 -; P9BE-AIX32-NEXT: vadduwm 2, 2, 3 +; P9BE-AIX32-NEXT: xxperm 3, 1, 0 +; P9BE-AIX32-NEXT: xxspltw 2, 3, 1 +; P9BE-AIX32-NEXT: vadduwm 2, 3, 2 ; P9BE-AIX32-NEXT: stxv 2, -16(1) ; P9BE-AIX32-NEXT: lwz 3, -16(1) ; P9BE-AIX32-NEXT: cmpw 3, 5 diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll --- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll @@ -195,9 +195,9 @@ ; ; CHECK-LE-P9-LABEL: test_none_v16i8: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrd v2, r3 ; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: lxv vs0, 0(r4) ; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l ; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) ; CHECK-LE-P9-NEXT: xxperm v2, vs0, vs1 @@ -215,9 +215,9 @@ ; ; CHECK-BE-P9-LABEL: test_none_v16i8: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtvsrwz v2, r3 ; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: lxv vs0, 0(r4) ; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l ; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) ; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1 @@ -234,9 +234,9 @@ ; ; CHECK-AIX-64-P9-LABEL: test_none_v16i8: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r3 ; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0 -; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4) ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr @@ -443,9 +443,9 @@ ; ; CHECK-LE-P9-LABEL: test_none_v8i16: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrd v2, r3 ; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: lxv vs0, 0(r4) ; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l ; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) ; CHECK-LE-P9-NEXT: xxperm v2, vs0, vs1 @@ -463,9 +463,9 @@ ; ; CHECK-BE-P9-LABEL: test_none_v8i16: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtvsrwz v2, r3 ; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: lxv vs0, 0(r4) ; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l ; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) ; CHECK-BE-P9-NEXT: xxperm v2, vs0, vs1 @@ -482,9 +482,9 @@ ; ; CHECK-AIX-64-P9-LABEL: test_none_v8i16: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r3 ; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 -; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4) ; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) ; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1 ; CHECK-AIX-64-P9-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -88,12 +88,12 @@ ; PC64LE9-NEXT: xxsldwi 2, 35, 35, 3 ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xsdivsp 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxmrghw 34, 1, 2 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32( @@ -362,15 +362,15 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI7_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: addi 3, 3, .LCPI7_0@toc@l +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -683,14 +683,14 @@ ; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xsmulsp 1, 2, 1 ; PC64LE9-NEXT: xxsldwi 2, 35, 35, 3 -; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xsmulsp 2, 3, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxmrghw 34, 1, 2 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32( @@ -845,14 +845,14 @@ ; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xsaddsp 1, 2, 1 ; PC64LE9-NEXT: xxsldwi 2, 35, 35, 3 -; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xsaddsp 2, 3, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxmrghw 34, 1, 2 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32( @@ -1007,14 +1007,14 @@ ; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xssubsp 1, 2, 1 ; PC64LE9-NEXT: xxsldwi 2, 35, 35, 3 -; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xssubsp 2, 3, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxmrghw 34, 1, 2 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32( @@ -1159,10 +1159,10 @@ ; PC64LE9-NEXT: xssqrtsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxmrghw 34, 1, 2 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 ; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: xxperm 34, 35, 1 ; PC64LE9-NEXT: blr entry: %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32( @@ -1422,15 +1422,15 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI32_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI32_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: addi 3, 3, .LCPI32_0@toc@l +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -1850,15 +1850,15 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI37_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE9-NEXT: ld 30, 48(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI37_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2231,14 +2231,14 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI42_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI42_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2582,14 +2582,14 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI47_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI47_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2933,14 +2933,14 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI52_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI52_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3284,14 +3284,14 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI57_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI57_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3635,14 +3635,14 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI62_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI62_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3986,14 +3986,14 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI67_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI67_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4337,14 +4337,14 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI72_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI72_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4599,10 +4599,10 @@ ; PC64LE9-NEXT: xsrdpic 0, 0 ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxmrghw 34, 1, 2 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 ; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: xxperm 34, 35, 1 ; PC64LE9-NEXT: blr entry: %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32( @@ -4827,14 +4827,14 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI82_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 3, 3, .LCPI82_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5152,15 +5152,15 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI87_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI87_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: addi 3, 3, .LCPI87_0@toc@l +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5394,15 +5394,15 @@ ; PC64LE9-NEXT: xscvdpspn 0, 1 ; PC64LE9-NEXT: xscvdpspn 1, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI92_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 ; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI92_0@toc@l -; PC64LE9-NEXT: xxmrghw 34, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 31 -; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: addi 3, 3, .LCPI92_0@toc@l +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5587,23 +5587,22 @@ ; PC64LE9-NEXT: xxswapd 1, 34 ; PC64LE9-NEXT: xscvspdpn 0, 0 ; PC64LE9-NEXT: xscvspdpn 1, 1 -; PC64LE9-NEXT: xxsldwi 2, 34, 34, 1 -; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xscvdpsxws 0, 0 ; PC64LE9-NEXT: xscvdpsxws 1, 1 -; PC64LE9-NEXT: xscvdpsxws 2, 2 ; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: mtfprwz 0, 3 ; PC64LE9-NEXT: mffprwz 3, 1 ; PC64LE9-NEXT: mtfprwz 1, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI97_0@toc@ha +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI97_0@toc@l -; PC64LE9-NEXT: xxmrghw 0, 1, 0 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: mffprwz 3, 2 -; PC64LE9-NEXT: mtfprwz 2, 3 -; PC64LE9-NEXT: xxperm 0, 2, 1 -; PC64LE9-NEXT: xxlor 34, 0, 0 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xscvspdpn 1, 1 +; PC64LE9-NEXT: xscvdpsxws 1, 1 +; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: mtvsrwz 34, 3 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32( @@ -5856,13 +5855,13 @@ ; PC64LE9-NEXT: mffprwz 3, 1 ; PC64LE9-NEXT: mtfprwz 1, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI105_0@toc@ha -; PC64LE9-NEXT: xxmrghw 34, 1, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: xscvdpsxws 1, 3 ; PC64LE9-NEXT: addi 3, 3, .LCPI105_0@toc@l ; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: mffprwz 3, 1 -; PC64LE9-NEXT: mtfprwz 1, 3 -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: mtvsrwz 34, 3 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64( @@ -6084,23 +6083,22 @@ ; PC64LE9-NEXT: xxswapd 1, 34 ; PC64LE9-NEXT: xscvspdpn 0, 0 ; PC64LE9-NEXT: xscvspdpn 1, 1 -; PC64LE9-NEXT: xxsldwi 2, 34, 34, 1 -; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xscvdpuxws 0, 0 ; PC64LE9-NEXT: xscvdpuxws 1, 1 -; PC64LE9-NEXT: xscvdpuxws 2, 2 ; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: mtfprwz 0, 3 ; PC64LE9-NEXT: mffprwz 3, 1 ; PC64LE9-NEXT: mtfprwz 1, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI113_0@toc@ha +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI113_0@toc@l -; PC64LE9-NEXT: xxmrghw 0, 1, 0 -; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: mffprwz 3, 2 -; PC64LE9-NEXT: mtfprwz 2, 3 -; PC64LE9-NEXT: xxperm 0, 2, 1 -; PC64LE9-NEXT: xxlor 34, 0, 0 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xscvspdpn 1, 1 +; PC64LE9-NEXT: xscvdpuxws 1, 1 +; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: mtvsrwz 34, 3 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32( @@ -6352,13 +6350,13 @@ ; PC64LE9-NEXT: mffprwz 3, 1 ; PC64LE9-NEXT: mtfprwz 1, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI121_0@toc@ha -; PC64LE9-NEXT: xxmrghw 34, 1, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: xscvdpuxws 1, 3 ; PC64LE9-NEXT: addi 3, 3, .LCPI121_0@toc@l ; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: mffprwz 3, 1 -; PC64LE9-NEXT: mtfprwz 1, 3 -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: mtvsrwz 34, 3 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64( @@ -6563,11 +6561,11 @@ ; PC64LE9-NEXT: addi 3, 3, .LCPI129_0@toc@l ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 34, 1, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: xsrsp 1, 3 ; PC64LE9-NEXT: lxv 0, 0(3) -; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xscvdpspn 34, 1 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64( @@ -6776,10 +6774,10 @@ ; PC64LE9-NEXT: xsrdpip 0, 0 ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxmrghw 34, 1, 2 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 ; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: xxperm 34, 35, 1 ; PC64LE9-NEXT: blr entry: %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32( @@ -6892,10 +6890,10 @@ ; PC64LE9-NEXT: xsrdpim 0, 0 ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxmrghw 34, 1, 2 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 ; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: xxperm 34, 35, 1 ; PC64LE9-NEXT: blr entry: %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32( @@ -7007,10 +7005,10 @@ ; PC64LE9-NEXT: xsrdpi 0, 0 ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxmrghw 34, 1, 2 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 ; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: xxperm 34, 35, 1 ; PC64LE9-NEXT: blr entry: %round = call <3 x float> @llvm.experimental.constrained.round.v3f32( @@ -7123,10 +7121,10 @@ ; PC64LE9-NEXT: xsrdpiz 0, 0 ; PC64LE9-NEXT: xscvdpspn 2, 2 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxmrghw 34, 1, 2 +; PC64LE9-NEXT: xscvdpspn 34, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 2 ; PC64LE9-NEXT: lxv 1, 0(3) -; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: xxperm 34, 35, 1 ; PC64LE9-NEXT: blr entry: %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32( @@ -7481,14 +7479,13 @@ ; PC64LE9-NEXT: addi 3, 3, .LCPI161_0@toc@l ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 0, 1, 0 -; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: mfvsrwz 3, 34 -; PC64LE9-NEXT: mtfprwa 2, 3 -; PC64LE9-NEXT: xscvsxdsp 2, 2 -; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxperm 0, 2, 1 -; PC64LE9-NEXT: xxlor 34, 0, 0 +; PC64LE9-NEXT: mtfprwa 1, 3 +; PC64LE9-NEXT: xscvsxdsp 1, 1 +; PC64LE9-NEXT: xscvdpspn 34, 1 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @@ -7556,12 +7553,12 @@ ; PC64LE9-NEXT: addi 3, 3, .LCPI163_0@toc@l ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 34, 1, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: mtfprd 1, 5 ; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: xscvsxdsp 1, 1 -; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xscvdpspn 34, 1 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @@ -8048,14 +8045,13 @@ ; PC64LE9-NEXT: addi 3, 3, .LCPI179_0@toc@l ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 0, 1, 0 -; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: mfvsrwz 3, 34 -; PC64LE9-NEXT: mtfprwz 2, 3 -; PC64LE9-NEXT: xscvuxdsp 2, 2 -; PC64LE9-NEXT: xscvdpspn 2, 2 -; PC64LE9-NEXT: xxperm 0, 2, 1 -; PC64LE9-NEXT: xxlor 34, 0, 0 +; PC64LE9-NEXT: mtfprwz 1, 3 +; PC64LE9-NEXT: xscvuxdsp 1, 1 +; PC64LE9-NEXT: xscvdpspn 34, 1 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @@ -8123,12 +8119,12 @@ ; PC64LE9-NEXT: addi 3, 3, .LCPI181_0@toc@l ; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxmrghw 34, 1, 0 +; PC64LE9-NEXT: xxmrghw 35, 1, 0 ; PC64LE9-NEXT: mtfprd 1, 5 ; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: xscvuxdsp 1, 1 -; PC64LE9-NEXT: xscvdpspn 1, 1 -; PC64LE9-NEXT: xxperm 34, 1, 0 +; PC64LE9-NEXT: xscvdpspn 34, 1 +; PC64LE9-NEXT: xxperm 34, 35, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> diff --git a/llvm/test/CodeGen/PowerPC/xxperm-swap.ll b/llvm/test/CodeGen/PowerPC/xxperm-swap.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/xxperm-swap.ll @@ -0,0 +1,96 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable +define dso_local noundef <16 x i8> @test1(<16 x i8> noundef %burn, <16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr #0 { +; CHECK-LE-P9-LABEL: test1: +; CHECK-LE-P9: %bb.0: +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-LE-P9-NEXT: lxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: xxperm v4, v3, vs0 +; CHECK-LE-P9-NEXT: vaddubm v2, v4, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P9-LABEL: test1: +; CHECK-BE-P9: %bb.0: +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v4, v3, vs0 +; CHECK-BE-P9-NEXT: vaddubm v2, v4, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test1: +; CHECK-AIX-64-P9: %bb.0: +; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) +; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxperm v4, v3, vs0 +; CHECK-AIX-64-P9-NEXT: vaddubm v2, v4, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test1: +; CHECK-AIX-32-P9: %bb.0: +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) +; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm v4, v3, vs0 +; CHECK-AIX-32-P9-NEXT: vaddubm v2, v4, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %add = add <16 x i8> %shuffle, %a + ret <16 x i8> %add +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable +define dso_local noundef <16 x i8> @test2(<16 x i8> noundef %burn, <16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr #0 { +; CHECK-LE-P9-LABEL: test2: +; CHECK-LE-P9: %bb.0: +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-LE-P9-NEXT: lxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: xxperm v3, v4, vs0 +; CHECK-LE-P9-NEXT: vaddubm v2, v3, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P9-LABEL: test2: +; CHECK-BE-P9: %bb.0: +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-BE-P9-NEXT: lxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v3, v4, vs0 +; CHECK-BE-P9-NEXT: vaddubm v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test2: +; HECK-AIX-64-P9: %bb.0: +; HECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) +; HECK-AIX-64-P9-NEXT: lxv vs0, 0(r3) +; HECK-AIX-64-P9-NEXT: xxperm v3, v4, vs0 +; HECK-AIX-64-P9-NEXT: vaddubm v2, v3, v4 +; HECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test2: +; CHECK-AIX-32-P9: %bb.0: +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) +; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxperm v3, v4, vs0 +; CHECK-AIX-32-P9-NEXT: vaddubm v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %add = add <16 x i8> %shuffle, %b + ret <16 x i8> %add +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+power8-vector,+power9-vector,+quadword-atomics,+vsx,-privileged,-rop-protect,-spe" }