Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -357,9 +357,8 @@ /// the backend. struct SDNodeFlags { private: - // This bit is used to determine if the flags are in a defined state. - // Flag bits can only be masked out during intersection if the masking flags - // are defined. + // This bit is used to determine if the flags are in a defined state. It is + // only used by SelectionDAGBuilder. bool AnyDefined : 1; bool NoUnsignedWrap : 1; @@ -464,11 +463,9 @@ bool hasAllowReassociation() const { return AllowReassociation; } bool hasNoFPExcept() const { return NoFPExcept; } - /// Clear any flags in this flag set that aren't also set in Flags. - /// If the given Flags are undefined then don't do anything. + /// Clear any flags in this flag set that aren't also set in Flags. All + /// flags will be cleared if Flags are undefined. void intersectWith(const SDNodeFlags Flags) { - if (!Flags.isDefined()) - return; NoUnsignedWrap &= Flags.NoUnsignedWrap; NoSignedWrap &= Flags.NoSignedWrap; Exact &= Flags.Exact; Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1128,6 +1128,8 @@ // TODO: We could handle all flags (nsw, etc) here. // TODO: If an IR instruction maps to >1 node, only the final node will have // flags set. + // TODO: The handling of flags should be improved, see + // https://reviews.llvm.org/D86871 if (SDNode *Node = getNodeForIRValue(&I)) { SDNodeFlags IncomingFlags; IncomingFlags.copyFMF(*FPMO); Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -521,8 +521,8 @@ return true; // TODO: Move into isKnownNeverNaN - if (N->getFlags().isDefined()) - return N->getFlags().hasNoNaNs(); + if (N->getFlags().hasNoNaNs()) + return true; return CurDAG->isKnownNeverNaN(N); } Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -150,8 +150,8 @@ return true; const auto Flags = Op.getNode()->getFlags(); - if (Flags.isDefined()) - return Flags.hasNoSignedZeros(); + if (Flags.hasNoSignedZeros()) + return true; return false; } Index: llvm/test/CodeGen/SystemZ/fp-mul-14.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/fp-mul-14.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Check that a multiply-and-add results. + +; FIXME: This test is xfailed temporarily +; XFAIL: * + +define void @f1(float %arg, float* %Dst) { +; CHECK-LABEL: f1: +; CHECK: maeb +bb: + %i = fmul contract float %arg, 0xBE6777A5C0000000 + %i4 = fadd contract float %i, 1.000000e+00 + %i5 = fmul contract float %arg, 0xBE6777A5C0000000 + %i6 = fadd contract float %i5, 1.000000e+00 + %i7 = fmul contract float %i4, 2.000000e+00 + store float %i7, float* %Dst + ret void +} + Index: llvm/test/CodeGen/SystemZ/int-cmp-60.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/int-cmp-60.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; +; Test that DAGCombiner properly clears the NUW/NSW flags on the memoized add +; node. + +define void @fun(i64* %Src, i32* %Dst) { +; CHECK-LABEL: fun: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iilf %r0, 1303940520 +; CHECK-NEXT: n %r0, 4(%r2) +; CHECK-NEXT: lr %r1, %r0 +; CHECK-NEXT: afi %r1, 1628135358 +; CHECK-NEXT: locrnhe %r1, %r0 +; CHECK-NEXT: st %r1, 0(%r3) +; CHECK-NEXT: br %r14 +entry: + %0 = load i64, i64* %Src, align 8 + %1 = trunc i64 %0 to i32 + %conv = and i32 %1, 1303940520 + %xor11.i = or i32 %conv, -2147483648 + %xor2.i = add i32 %xor11.i, -519348290 + %cmp.i = icmp slt i32 %xor2.i, 0 + %sub3.i = add nuw nsw i32 %conv, 1628135358 + %cond.i = select i1 %cmp.i, i32 %conv, i32 %sub3.i + store i32 %cond.i, i32* %Dst + ret void +} +