diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2789,6 +2789,12 @@ return false; } + /// Does this target tolerate non-zero high-order bits in a register passed + /// to a library function. + virtual bool shouldKeepZExtForLibCall() const { + return false; + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21174,7 +21174,7 @@ SDValue N0 = N->getOperand(0); // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) - if (N0->getOpcode() == ISD::AND) { + if (!TLI.shouldKeepZExtForLibCall() && N0->getOpcode() == ISD::AND) { ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); if (AndConst && AndConst->getAPIntValue() == 0xffff) { return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -987,6 +987,11 @@ shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override; + // Keep the zero-extensions for arguments to libcalls. + bool shouldKeepZExtForLibCall() const override { + return true; + } + /// createFastISel - This method returns a target-specific FastISel object, /// or null if the target does not support "fast" instruction selection. FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, diff --git a/llvm/test/CodeGen/PowerPC/pr49092.ll b/llvm/test/CodeGen/PowerPC/pr49092.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr49092.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: -check-prefix=CHECK-P9 + +define dso_local half @test2(i64 %a, i64 %b) local_unnamed_addr #0 { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: add r3, r4, r3 +; CHECK-NEXT: addi r3, r3, 11 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P9-LABEL: test2: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: add r3, r4, r3 +; CHECK-P9-NEXT: addi r3, r3, 11 +; CHECK-P9-NEXT: clrlwi r3, r3, 16 +; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: xscvhpdp f1, f0 +; CHECK-P9-NEXT: blr +entry: + %add = add i64 %b, %a + %0 = trunc i64 %add to i16 + %conv = add i16 %0, 11 + %call = bitcast i16 %conv to half + ret half %call +} +attributes #0 = { nounwind }