Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6949,7 +6949,8 @@ return SDValue(); } -static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { // First try to optimize away the conversion when it's conditionally from // a constant. Vectors only. SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG); @@ -6968,7 +6969,7 @@ // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead. // This eliminates an "integer-to-vector-move UOP and improve throughput. SDValue N0 = N->getOperand(0); - if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast(N0)->isVolatile()) { LoadSDNode *LN0 = cast(N0); @@ -8525,7 +8526,7 @@ return performMulCombine(N, DAG, DCI, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - return performIntToFpCombine(N, DAG); + return performIntToFpCombine(N, DAG, Subtarget); case ISD::OR: return performORCombine(N, DCI, Subtarget); case ISD::INTRINSIC_WO_CHAIN: Index: test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll @@ -0,0 +1,16 @@ +;RUN: llc <%s -mattr=-neon -mattr=-fp-armv8 | FileCheck %s +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +@t = common global i32 0, align 4 +@x = common global i32 0, align 4 + +define void @foo() { +entry: +;CHECK-LABEL: foo: +;CHECK: __floatsisf + %0 = load i32* @x, align 4 + %conv = sitofp i32 %0 to float + store float %conv, float* bitcast (i32* @t to float*), align 4 + ret void +}