diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -677,6 +677,18 @@ } } + if (!ST->isISA3_0()) { + switch (J->getOpcode()) { + case Instruction::FPTrunc: + case Instruction::FPExt: { + CastInst *CI = cast(J); + if (CI->getSrcTy()->getScalarType()->isHalfTy() || + CI->getDestTy()->getScalarType()->isHalfTy()) + return true; + } + } + } + for (Value *Operand : J->operands()) if (memAddrUsesCTR(Operand, TM, Visited)) return true; diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll --- a/llvm/test/CodeGen/PowerPC/pr48519.ll +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -49,6 +49,56 @@ unreachable } +define void @julia__hypot_17() { +; CHECK-LABEL: julia__hypot_17: +; CHECK: # %bb.0: # %top +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: li r30, 3 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB1_1: # %L57 +; CHECK-NEXT: # +; CHECK-NEXT: addi r30, r30, -1 +; CHECK-NEXT: cmpldi r30, 0 +; CHECK-NEXT: beq cr0, .LBB1_3 +; CHECK-NEXT: # %bb.2: # %L68 +; CHECK-NEXT: # +; CHECK-NEXT: lhz r3, 0(0) +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fcmpu cr0, f1, f1 +; CHECK-NEXT: bun cr0, .LBB1_1 +; CHECK-NEXT: .LBB1_3: # %L78 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +top: + br label %L57 + +L57: ; preds = %L68, %top + %value_phi117 = phi i64 [ %0, %L68 ], [ 2, %top ] + %exitcond = icmp eq i64 %value_phi117, 4 + br i1 %exitcond, label %L78, label %L68 + +L68: ; preds = %L57 + %0 = add nuw nsw i64 %value_phi117, 1 + %1 = load half, half* null, align 2 + %2 = fpext half %1 to float + %3 = fcmp uno float %2, 0.000000e+00 + %4 = or i1 %3, false + br i1 %4, label %L57, label %L78 + +L78: ; preds = %L68, %L57 + ret void +} + ; Function Attrs: nounwind readnone speculatable willreturn declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #0