Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4050,17 +4050,22 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; - ISD::CondCode CC = cast(N->getOperand(2))->get(); + bool IsStrict = N->isStrictFPOpcode(); + ISD::CondCode CC = + cast(N->getOperand(IsStrict ? 3 : 2))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); - if (!Subtarget->useCRBits() && isInt32Immediate(N->getOperand(1), Imm)) { + SDValue LHS = N->getOperand(IsStrict ? 1 : 0); + SDValue RHS = N->getOperand(IsStrict ? 2 : 1); + + if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. // setcc op, 0 if (Imm == 0) { - SDValue Op = N->getOperand(0); + SDValue Op = LHS; switch (CC) { default: break; case ISD::SETEQ: { @@ -4095,7 +4100,7 @@ } } } else if (Imm == ~0U) { // setcc op, -1 - SDValue Op = N->getOperand(0); + SDValue Op = LHS; switch (CC) { default: break; case ISD::SETEQ: @@ -4138,12 +4143,9 @@ } } - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. - if (LHS.getValueType().isVector()) { + if (!IsStrict && LHS.getValueType().isVector()) { if (Subtarget->hasSPE()) return false; @@ -4193,14 +4195,21 @@ SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; if (!Inv) { - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + if (IsStrict) + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, MVT::Other, Ops); + else + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } // Get the specified bit. SDValue Tmp = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); - CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); + if (IsStrict) + CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, MVT::Other, Tmp, + getI32Imm(1, dl)); + else + CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); return true; } @@ -4664,6 +4673,8 @@ break; case ISD::SETCC: + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: if (trySETCC(N)) return; break; Index: llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr9 -O0 | FileCheck %s + +define i32 @une_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 { +; CHECK-LABEL: une_ppcf128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpudp cr0, f1, f3 +; CHECK-NEXT: mcrf cr7, cr0 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: mcrf cr7, cr0 +; CHECK-NEXT: mfocrf r4, 1 +; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31 +; CHECK-NEXT: xori r4, r4, 1 +; CHECK-NEXT: and r3, r4, r3 +; CHECK-NEXT: mcrf cr7, cr0 +; CHECK-NEXT: mfocrf r4, 1 +; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31 +; CHECK-NEXT: xscmpudp cr7, f2, f4 +; CHECK-NEXT: mfocrf r5, 1 +; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31 +; CHECK-NEXT: xori r5, r5, 1 +; CHECK-NEXT: and r4, r4, r5 +; CHECK-NEXT: or r3, r3, r4 +; CHECK-NEXT: # kill: def $r4 killed $r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +entry: + %0 = call i1 @llvm.experimental.constrained.fcmp.ppcf128(ppc_fp128 %a, ppc_fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0 + %1 = zext i1 %0 to i32 + ret i32 %1 +} + +; This is a different branch from une +define i32 @ogt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 { +; CHECK-LABEL: ogt_ppcf128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpudp cr0, f1, f3 +; CHECK-NEXT: mcrf cr7, cr0 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: xscmpudp cr7, f2, f4 +; CHECK-NEXT: mfocrf r4, 1 +; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31 +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: mcrf cr7, cr0 +; CHECK-NEXT: mfocrf r4, 1 +; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31 +; CHECK-NEXT: mcrf cr7, cr0 +; CHECK-NEXT: mfocrf r5, 1 +; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31 +; CHECK-NEXT: xori r5, r5, 1 +; CHECK-NEXT: and r4, r5, r4 +; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: # kill: def $r4 killed $r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +entry: + %0 = call i1 @llvm.experimental.constrained.fcmp.ppcf128(ppc_fp128 %a, ppc_fp128 %b, metadata !"ogt", metadata !"fpexcept.strict") #0 + %1 = zext i1 %0 to i32 + ret i32 %1 +} + +define i1 @test_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: test_f128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpuqp cr7, v2, v3 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: # implicit-def: $x4 +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +entry: + %0 = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0 + ret i1 %0 +} + +define i1 @testbr_f64(double %a, double %b) #0 { +; CHECK-LABEL: testbr_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpudp cr7, f1, f2 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: bne cr0, .LBB3_2 +; CHECK-NEXT: b .LBB3_1 +; CHECK-NEXT: .LBB3_1: # %tr +; CHECK-NEXT: li r3, -1 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB3_2: # %fl +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr +entry: + %0 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") #0 + br i1 %0, label %tr, label %fl +tr: + ret i1 true +fl: + ret i1 false +} + +define i1 @testbr_f32(float %a, float %b) #0 { +; CHECK-LABEL: testbr_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu cr7, f1, f2 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: bne cr0, .LBB4_2 +; CHECK-NEXT: b .LBB4_1 +; CHECK-NEXT: .LBB4_1: # %tr +; CHECK-NEXT: li r3, -1 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB4_2: # %fl +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr +entry: + %0 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") #0 + br i1 %0, label %tr, label %fl +tr: + ret i1 true +fl: + ret i1 false +} + +declare i1 @llvm.experimental.constrained.fcmp.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) + +attributes #0 = { strictfp }