diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -470,6 +470,7 @@ SDValue visitFADD(SDNode *N); SDValue visitSTRICT_FADD(SDNode *N); SDValue visitFSUB(SDNode *N); + SDValue visitSTRICT_FSUB(SDNode *N); SDValue visitFMUL(SDNode *N); SDValue visitFMA(SDNode *N); SDValue visitFDIV(SDNode *N); @@ -1691,6 +1692,7 @@ case ISD::FADD: return visitFADD(N); case ISD::STRICT_FADD: return visitSTRICT_FADD(N); case ISD::FSUB: return visitFSUB(N); + case ISD::STRICT_FSUB: return visitSTRICT_FSUB(N); case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA(N); case ISD::FDIV: return visitFDIV(N); @@ -13397,6 +13399,25 @@ return SDValue(); } +SDValue DAGCombiner::visitSTRICT_FSUB(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N0 = N->getOperand(1); + SDValue N1 = N->getOperand(2); + EVT VT = N->getValueType(0); + EVT ChainVT = N->getValueType(1); + SDLoc DL(N); + const SDNodeFlags Flags = N->getFlags(); + + // fold (strict_fsub A, (fneg B)) -> (strict_fadd A, B) + if (SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + + return DAG.getNode(ISD::STRICT_FADD, DL, DAG.getVTList(VT, ChainVT), + {Chain, N0, NegN1}, Flags); + + return SDValue(); +} + SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -1059,22 +1059,24 @@ } define <1 x float> @constrained_vector_fsub_v1f32() #0 { +; PC64LE: # float -1 ; PC64LE-LABEL: constrained_vector_fsub_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI20_0@toc@ha ; PC64LE-NEXT: addis 4, 2, .LCPI20_1@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI20_0@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI20_1@toc@l(4) -; PC64LE-NEXT: xssubsp 1, 1, 0 +; PC64LE-NEXT: xsaddsp 1, 1, 0 ; PC64LE-NEXT: blr ; +; PC64LE9: # float -1 ; PC64LE9-LABEL: constrained_vector_fsub_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI20_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI20_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI20_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI20_1@toc@l(3) -; PC64LE9-NEXT: xssubsp 1, 1, 0 +; PC64LE9-NEXT: xsaddsp 1, 1, 0 ; PC64LE9-NEXT: blr entry: %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32( @@ -1086,6 +1088,8 @@ } define <2 x double> @constrained_vector_fsub_v2f64() #0 { +; PC64LE: # double -1 +; PC64LE: # double -0.10000000000000001 ; PC64LE-LABEL: constrained_vector_fsub_v2f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI21_0@toc@ha @@ -1096,9 +1100,11 @@ ; PC64LE-NEXT: lxvd2x 1, 0, 4 ; PC64LE-NEXT: xxswapd 0, 0 ; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvsubdp 34, 1, 0 +; PC64LE-NEXT: xvadddp 34, 1, 0 ; PC64LE-NEXT: blr ; +; PC64LE9: # double -1 +; PC64LE9: # double -0.10000000000000001 ; PC64LE9-LABEL: constrained_vector_fsub_v2f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI21_0@toc@ha @@ -1107,7 +1113,7 @@ ; PC64LE9-NEXT: addis 3, 2, .LCPI21_1@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI21_1@toc@l ; PC64LE9-NEXT: lxvx 1, 0, 3 -; PC64LE9-NEXT: xvsubdp 34, 1, 0 +; PC64LE9-NEXT: xvadddp 34, 1, 0 ; PC64LE9-NEXT: blr entry: %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( @@ -1119,21 +1125,25 @@ } define <3 x float> @constrained_vector_fsub_v3f32() #0 { +; PC64LE: # float -0 +; PC64LE: # float -1 +; PC64LE: # float -2 ; PC64LE-LABEL: constrained_vector_fsub_v3f32: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: addis 3, 2, .LCPI22_1@toc@ha +; PC64LE-NEXT: addis 4, 2, .LCPI22_3@toc@ha +; PC64LE-NEXT: addis 5, 2, .LCPI22_2@toc@ha +; PC64LE-NEXT: lfs 0, .LCPI22_1@toc@l(3) +; PC64LE-NEXT: lfs 1, .LCPI22_3@toc@l(4) +; PC64LE-NEXT: lfs 2, .LCPI22_2@toc@l(5) ; PC64LE-NEXT: addis 3, 2, .LCPI22_0@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI22_2@toc@ha -; PC64LE-NEXT: addis 5, 2, .LCPI22_1@toc@ha -; PC64LE-NEXT: xxlxor 3, 3, 3 -; PC64LE-NEXT: lfs 0, .LCPI22_0@toc@l(3) -; PC64LE-NEXT: lfs 1, .LCPI22_2@toc@l(4) -; PC64LE-NEXT: lfs 2, .LCPI22_1@toc@l(5) -; PC64LE-NEXT: addis 3, 2, .LCPI22_3@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI22_3@toc@l -; PC64LE-NEXT: xssubsp 1, 0, 1 +; PC64LE-NEXT: xsaddsp 1, 0, 1 +; PC64LE-NEXT: lfs 3, .LCPI22_0@toc@l(3) +; PC64LE-NEXT: addis 3, 2, .LCPI22_4@toc@ha +; PC64LE-NEXT: xsaddsp 2, 0, 2 +; PC64LE-NEXT: addi 3, 3, .LCPI22_4@toc@l ; PC64LE-NEXT: lvx 4, 0, 3 -; PC64LE-NEXT: xssubsp 2, 0, 2 -; PC64LE-NEXT: xssubsp 0, 0, 3 +; PC64LE-NEXT: xsaddsp 0, 0, 3 ; PC64LE-NEXT: xscvdpspn 1, 1 ; PC64LE-NEXT: xscvdpspn 2, 2 ; PC64LE-NEXT: xscvdpspn 0, 0 @@ -1144,26 +1154,30 @@ ; PC64LE-NEXT: vperm 2, 3, 2, 4 ; PC64LE-NEXT: blr ; +; PC64LE9: # float -0 +; PC64LE9: # float -1 +; PC64LE9: # float -2 ; PC64LE9-LABEL: constrained_vector_fsub_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI22_0@toc@ha -; PC64LE9-NEXT: xxlxor 1, 1, 1 ; PC64LE9-NEXT: lfs 0, .LCPI22_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI22_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI22_1@toc@l(3) +; PC64LE9-NEXT: lfs 1, .LCPI22_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI22_2@toc@ha -; PC64LE9-NEXT: lfs 3, .LCPI22_2@toc@l(3) +; PC64LE9-NEXT: xsaddsp 0, 1, 0 +; PC64LE9-NEXT: lfs 2, .LCPI22_2@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI22_3@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI22_3@toc@l -; PC64LE9-NEXT: xssubsp 1, 0, 1 +; PC64LE9-NEXT: lfs 3, .LCPI22_3@toc@l(3) +; PC64LE9-NEXT: addis 3, 2, .LCPI22_4@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI22_4@toc@l ; PC64LE9-NEXT: lxvx 36, 0, 3 -; PC64LE9-NEXT: xssubsp 2, 0, 2 -; PC64LE9-NEXT: xssubsp 0, 0, 3 +; PC64LE9-NEXT: xsaddsp 2, 1, 2 +; PC64LE9-NEXT: xsaddsp 1, 1, 3 ; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 3 -; PC64LE9-NEXT: xscvdpspn 0, 2 -; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 -; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: xscvdpspn 1, 1 +; PC64LE9-NEXT: xxsldwi 34, 1, 1, 3 +; PC64LE9-NEXT: xscvdpspn 1, 2 +; PC64LE9-NEXT: xxsldwi 35, 1, 1, 3 ; PC64LE9-NEXT: vmrghw 2, 3, 2 ; PC64LE9-NEXT: xxsldwi 35, 0, 0, 3 ; PC64LE9-NEXT: vperm 2, 3, 2, 4 @@ -1179,42 +1193,48 @@ } define <3 x double> @constrained_vector_fsub_v3f64() #0 { +; PC64LE: # double -2 +; PC64LE: # double -1 ; PC64LE-LABEL: constrained_vector_fsub_v3f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: addis 3, 2, .LCPI23_1@toc@ha -; PC64LE-NEXT: addis 4, 2, .LCPI23_2@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI23_1@toc@l -; PC64LE-NEXT: addi 4, 4, .LCPI23_2@toc@l +; PC64LE-NEXT: addis 3, 2, .LCPI23_2@toc@ha +; PC64LE-NEXT: addis 4, 2, .LCPI23_3@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI23_2@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI23_3@toc@l ; PC64LE-NEXT: lxvd2x 0, 0, 3 ; PC64LE-NEXT: lxvd2x 1, 0, 4 ; PC64LE-NEXT: addis 3, 2, .LCPI23_0@toc@ha +; PC64LE-NEXT: addis 4, 2, .LCPI23_1@toc@ha ; PC64LE-NEXT: lfd 3, .LCPI23_0@toc@l(3) ; PC64LE-NEXT: xxswapd 0, 0 ; PC64LE-NEXT: xxswapd 1, 1 -; PC64LE-NEXT: xvsubdp 2, 1, 0 -; PC64LE-NEXT: xxlxor 0, 0, 0 -; PC64LE-NEXT: xssubdp 3, 3, 0 +; PC64LE-NEXT: xvadddp 2, 1, 0 +; PC64LE-NEXT: lfs 0, .LCPI23_1@toc@l(4) +; PC64LE-NEXT: xsadddp 3, 3, 0 ; PC64LE-NEXT: xxswapd 1, 2 -; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 -; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; PC64LE-NEXT: blr ; +; PC64LE9: # double -2 +; PC64LE9: # double -1 ; PC64LE9-LABEL: constrained_vector_fsub_v3f64: -; PC64LE9: # %bb.0: # %entry +; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI23_0@toc@ha -; PC64LE9-NEXT: xxlxor 1, 1, 1 ; PC64LE9-NEXT: lfd 0, .LCPI23_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI23_1@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI23_1@toc@l -; PC64LE9-NEXT: xssubdp 3, 0, 1 -; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: lfs 1, .LCPI23_1@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI23_2@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI23_2@toc@l +; PC64LE9-NEXT: xsadddp 3, 0, 1 +; PC64LE9-NEXT: lxvx 0, 0, 3 +; PC64LE9-NEXT: addis 3, 2, .LCPI23_3@toc@ha +; PC64LE9-NEXT: addi 3, 3, .LCPI23_3@toc@l ; PC64LE9-NEXT: lxvx 1, 0, 3 -; PC64LE9-NEXT: xvsubdp 2, 1, 0 +; PC64LE9-NEXT: xvadddp 2, 1, 0 ; PC64LE9-NEXT: xxswapd 1, 2 -; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 -; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 ; PC64LE9-NEXT: blr entry: %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64( @@ -1227,6 +1247,10 @@ } define <4 x double> @constrained_vector_fsub_v4f64() #0 { +; PC64LE: # double -2 +; PC64LE: # double -0.20000000000000001 +; PC64LE: # double -1 +; PC64LE: # double -0.10000000000000001 ; PC64LE-LABEL: constrained_vector_fsub_v4f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI24_0@toc@ha @@ -1241,10 +1265,14 @@ ; PC64LE-NEXT: xxswapd 0, 0 ; PC64LE-NEXT: xxswapd 1, 1 ; PC64LE-NEXT: xxswapd 2, 2 -; PC64LE-NEXT: xvsubdp 35, 1, 0 -; PC64LE-NEXT: xvsubdp 34, 1, 2 +; PC64LE-NEXT: xvadddp 35, 1, 0 +; PC64LE-NEXT: xvadddp 34, 1, 2 ; PC64LE-NEXT: blr ; +; PC64LE9: # double -2 +; PC64LE9: # double -0.20000000000000001 +; PC64LE9: # double -1 +; PC64LE9: # double -0.10000000000000001 ; PC64LE9-LABEL: constrained_vector_fsub_v4f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI24_0@toc@ha @@ -1255,9 +1283,9 @@ ; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI24_2@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI24_2@toc@l -; PC64LE9-NEXT: xvsubdp 35, 1, 0 +; PC64LE9-NEXT: xvadddp 35, 1, 0 ; PC64LE9-NEXT: lxvx 0, 0, 3 -; PC64LE9-NEXT: xvsubdp 34, 1, 0 +; PC64LE9-NEXT: xvadddp 34, 1, 0 ; PC64LE9-NEXT: blr entry: %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64( diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -909,15 +909,15 @@ ; S390X-NEXT: larl %r1, .LCPI20_0 ; S390X-NEXT: le %f0, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI20_1 -; S390X-NEXT: seb %f0, 0(%r1) +; S390X-NEXT: aeb %f0, 0(%r1) ; S390X-NEXT: br %r14 ; ; SZ13-LABEL: constrained_vector_fsub_v1f32: ; SZ13: # %bb.0: # %entry -; SZ13-NEXT: vgmf %v0, 2, 8 -; SZ13-NEXT: vgmf %v1, 1, 8 -; SZ13-NEXT: sebr %f1, %f0 -; SZ13-NEXT: vlr %v24, %v1 +; SZ13-NEXT: larl %r1, .LCPI20_0 +; SZ13-NEXT: vgmf %v0, 1, 8 +; SZ13-NEXT: aeb %f0, 0(%r1) +; SZ13-NEXT: vlr %v24, %v0 ; SZ13-NEXT: br %r14 entry: %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32( @@ -932,13 +932,13 @@ ; S390X-LABEL: constrained_vector_fsub_v2f64: ; S390X: # %bb.0: # %entry ; S390X-NEXT: larl %r1, .LCPI21_0 -; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI21_2 -; S390X-NEXT: ldeb %f1, 0(%r1) +; S390X-NEXT: ldeb %f0, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI21_1 -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: sdb %f2, 0(%r1) -; S390X-NEXT: sdbr %f0, %f1 +; S390X-NEXT: ldr %f2, %f1 +; S390X-NEXT: adb %f2, 0(%r1) +; S390X-NEXT: adbr %f0, %f1 ; S390X-NEXT: br %r14 ; ; SZ13-LABEL: constrained_vector_fsub_v2f64: @@ -946,7 +946,7 @@ ; SZ13-NEXT: larl %r1, .LCPI21_0 ; SZ13-NEXT: vl %v0, 0(%r1), 3 ; SZ13-NEXT: vgmg %v1, 12, 10 -; SZ13-NEXT: vfsdb %v24, %v1, %v0 +; SZ13-NEXT: vfadb %v24, %v1, %v0 ; SZ13-NEXT: br %r14 entry: %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( @@ -962,31 +962,31 @@ ; S390X: # %bb.0: # %entry ; S390X-NEXT: larl %r1, .LCPI22_0 ; S390X-NEXT: le %f0, 0(%r1) -; S390X-NEXT: ler %f4, %f0 +; S390X-NEXT: lzer %f1 +; S390X-NEXT: lcdfr %f4, %f1 +; S390X-NEXT: aebr %f4, %f0 ; S390X-NEXT: larl %r1, .LCPI22_1 ; S390X-NEXT: ler %f2, %f0 -; S390X-NEXT: seb %f2, 0(%r1) +; S390X-NEXT: aeb %f2, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI22_2 -; S390X-NEXT: seb %f0, 0(%r1) -; S390X-NEXT: lzer %f1 -; S390X-NEXT: sebr %f4, %f1 +; S390X-NEXT: aeb %f0, 0(%r1) ; S390X-NEXT: br %r14 ; ; SZ13-LABEL: constrained_vector_fsub_v3f32: ; SZ13: # %bb.0: # %entry -; SZ13-NEXT: vgbm %v2, 61440 -; SZ13-NEXT: lzer %f1 -; SZ13-NEXT: sebr %f2, %f1 -; SZ13-NEXT: vgmf %v1, 1, 1 -; SZ13-NEXT: vgbm %v3, 61440 -; SZ13-NEXT: vgbm %v0, 61440 -; SZ13-NEXT: sebr %f3, %f1 -; SZ13-NEXT: vgmf %v1, 2, 8 -; SZ13-NEXT: sebr %f0, %f1 -; SZ13-NEXT: vmrhf %v0, %v3, %v0 -; SZ13-NEXT: vrepf %v1, %v2, 0 -; SZ13-NEXT: vmrhg %v24, %v0, %v1 +; SZ13-NEXT: lzer %f0 +; SZ13-NEXT: vgbm %v1, 61440 +; SZ13-NEXT: vgmf %v2, 0, 1 +; SZ13-NEXT: lcdfr %f0, %f0 +; SZ13-NEXT: larl %r1, .LCPI22_0 +; SZ13-NEXT: aebr %f0, %f1 +; SZ13-NEXT: aebr %f2, %f1 +; SZ13-NEXT: aeb %f1, 0(%r1) +; SZ13-NEXT: vmrhf %v1, %v2, %v1 +; SZ13-NEXT: vrepf %v0, %v0, 0 +; SZ13-NEXT: vmrhg %v24, %v1, %v0 ; SZ13-NEXT: br %r14 + entry: %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32( <3 x float> , <3 x double>* %a %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64( @@ -1038,20 +1038,19 @@ ; S390X-LABEL: constrained_vector_fsub_v4f64: ; S390X: # %bb.0: # %entry ; S390X-NEXT: larl %r1, .LCPI24_0 -; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI24_1 -; S390X-NEXT: ldr %f6, %f0 -; S390X-NEXT: sdb %f6, 0(%r1) +; S390X-NEXT: ldr %f2, %f1 +; S390X-NEXT: ldr %f6, %f1 +; S390X-NEXT: adb %f6, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI24_2 -; S390X-NEXT: ldeb %f1, 0(%r1) +; S390X-NEXT: ldeb %f4, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI24_4 -; S390X-NEXT: ldeb %f3, 0(%r1) +; S390X-NEXT: ldeb %f0, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI24_3 -; S390X-NEXT: ldr %f2, %f0 -; S390X-NEXT: sdb %f2, 0(%r1) -; S390X-NEXT: ldr %f4, %f0 -; S390X-NEXT: sdbr %f4, %f1 -; S390X-NEXT: sdbr %f0, %f3 +; S390X-NEXT: adb %f2, 0(%r1) +; S390X-NEXT: adbr %f4, %f1 +; S390X-NEXT: adbr %f0, %f1 ; S390X-NEXT: br %r14 ; ; SZ13-LABEL: constrained_vector_fsub_v4f64: @@ -1060,9 +1059,9 @@ ; SZ13-NEXT: vl %v0, 0(%r1), 3 ; SZ13-NEXT: vgmg %v1, 12, 10 ; SZ13-NEXT: larl %r1, .LCPI24_1 -; SZ13-NEXT: vfsdb %v26, %v1, %v0 +; SZ13-NEXT: vfadb %v26, %v1, %v0 ; SZ13-NEXT: vl %v0, 0(%r1), 3 -; SZ13-NEXT: vfsdb %v24, %v1, %v0 +; SZ13-NEXT: vfadb %v24, %v1, %v0 ; SZ13-NEXT: br %r14 entry: %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64( diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -67,7 +67,8 @@ ; X87-LABEL: f2: ; X87: # %bb.0: # %entry ; X87-NEXT: fldz -; X87-NEXT: fsubrl {{[0-9]+}}(%esp) +; X87-NEXT: fchs +; X87-NEXT: faddl 4(%esp) ; X87-NEXT: wait ; X87-NEXT: retl ; @@ -76,27 +77,24 @@ ; X86-SSE-NEXT: subl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 16 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE-NEXT: subsd %xmm1, %xmm0 -; X86-SSE-NEXT: movsd %xmm0, (%esp) -; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: addsd .LCPI1_0, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: fldl (%esp) ; X86-SSE-NEXT: wait -; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl ; ; SSE-LABEL: f2: ; SSE: # %bb.0: # %entry -; SSE-NEXT: xorpd %xmm1, %xmm1 -; SSE-NEXT: subsd %xmm1, %xmm0 +; SSE-NEXT: addsd .LCPI1_0(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: f2: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddsd .LCPI1_0(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq -entry: +entry-f2: %sub = call double @llvm.experimental.constrained.fsub.f64( double %a, double 0.000000e+00, diff --git a/llvm/test/CodeGen/X86/strict-fsub-combines.ll b/llvm/test/CodeGen/X86/strict-fsub-combines.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/strict-fsub-combines.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +define float @fneg_strict_fsub_to_strict_fadd(float %x, float %y) { + ; CHECK: addss %{{.*}}, %{{.*}} + %neg = fneg float %y + %sub = call float @llvm.experimental.constrained.fsub.f32(float %x, float %neg, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret float %sub +} + +define double @fneg_strict_fsub_to_strict_fadd_d(double %x, double %y) { + ; CHECK: addsd %{{.*}}, %{{.*}} + %neg = fneg double %y + %sub = call double @llvm.experimental.constrained.fsub.f64(double %x, double %neg, metadata!"round.dynamic", metadata!"fpexcept.strict") + ret double %sub +} + +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -677,16 +677,18 @@ } define <1 x float> @constrained_vector_fsub_v1f32() #0 { +; CHECK: # float -1 ; CHECK-LABEL: constrained_vector_fsub_v1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: subss {{.*}}(%rip), %xmm0 +; CHECK-NEXT: addss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; +; AVX: # float -1 ; AVX-LABEL: constrained_vector_fsub_v1f32: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vsubss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq entry: %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32( @@ -698,16 +700,20 @@ } define <2 x double> @constrained_vector_fsub_v2f64() #0 { +; CHECK: # double -1 +; CHECK: # double -0.10000000000000001 ; CHECK-LABEL: constrained_vector_fsub_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; +; AVX: # double -1 +; AVX: # double -0.10000000000000001 ; AVX-LABEL: constrained_vector_fsub_v2f64: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq entry: %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( @@ -719,28 +725,29 @@ } define <3 x float> @constrained_vector_fsub_v3f32() #0 { +; CHECK: # float -0 +; CHECK: # float -2 +; CHECK: # float -1 ; CHECK-LABEL: constrained_vector_fsub_v3f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: movaps %xmm1, %xmm2 -; CHECK-NEXT: subss %xmm0, %xmm2 -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: subss {{.*}}(%rip), %xmm0 -; CHECK-NEXT: subss {{.*}}(%rip), %xmm1 +; CHECK: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: addss %xmm1, %xmm2 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: addss {{.*}}(%rip), %xmm1 ; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fsub_v3f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vsubss {{.*}}(%rip), %xmm1, %xmm2 -; AVX-NEXT: vsubss {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] +; AVX: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm2 +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX-NEXT: retq entry: %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32( @@ -753,13 +760,15 @@ } define <3 x double> @constrained_vector_fsub_v3f64() #0 { +; CHECK: # double -2 +; CHECK: # double -1 +; CHECK: # double -0 ; CHECK-LABEL: constrained_vector_fsub_v3f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK: # %bb.0: # %entry +; CHECK: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1 ; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movapd %xmm0, %xmm1 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] @@ -769,11 +778,10 @@ ; ; AVX-LABEL: constrained_vector_fsub_v3f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: @@ -787,24 +795,28 @@ } define <4 x double> @constrained_vector_fsub_v4f64() #0 { +; CHECK: # double -2 +; CHECK: # double -0.20000000000000001 +; CHECK: # double -1 +; CHECK: # double -0.10000000000000001 ; CHECK-LABEL: constrained_vector_fsub_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] -; CHECK-NEXT: movapd %xmm0, %xmm1 -; CHECK-NEXT: subpd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movapd {{.*#+}} xmm1 = [-2.0E+0,-2.0000000000000001E-1] +; CHECK-NEXT: addpd %xmm0, %xmm1 +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fsub_v4f64: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308] -; AVX1-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fsub_v4f64: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308] -; AVX512-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 ; AVX512-NEXT: retq entry: %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(