Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7409,22 +7409,28 @@ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); - // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading + // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading // constant pool values. - // TODO: We can also optimize for vectors here, but we need to make sure - // that the sign mask is created properly for each vector element. if (!TLI.isFAbsFree(VT) && - N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && - N0.getOperand(0).getValueType().isInteger() && - !VT.isVector()) { + N0.getOpcode() == ISD::BITCAST && + N0.getNode()->hasOneUse()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x7f... per scalar element + // and splat it. + SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x7f... + SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); + } Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, - DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + DAG.getConstant(SignMask, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), - N->getValueType(0), Int); + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } } Index: llvm/trunk/test/CodeGen/ARM/fabs-neon.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fabs-neon.ll +++ llvm/trunk/test/CodeGen/ARM/fabs-neon.ll @@ -15,3 +15,40 @@ ret <2 x float> %foo } declare <2 x float> @llvm.fabs.v2f32(<2 x float> %a) + +; No constant pool loads or vector ops are needed for the fabs of a +; bitcasted integer constant; we should just return integer constants +; that have the sign bits turned off. +; +; So instead of something like this: +; mvn r0, #0 +; mov r1, #0 +; vmov d16, r1, r0 +; vabs.f32 d16, d16 +; vmov r0, r1, d16 +; bx lr +; +; We should generate: +; mov r0, #0 +; mvn r1, #-2147483648 +; mov pc, lr + +; CHECK-LABEL: fabs_v2f32_1 +define i64 @fabs_v2f32_1() { + %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000 + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) + %ret = bitcast <2 x float> %fabs to i64 + ret i64 %ret +; CHECK: mvn r1, #-2147483648 +; CHECK-NOT: vabs +} + +; CHECK-LABEL: fabs_v2f32_2 +define i64 @fabs_v2f32_2() { + %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) + %ret = bitcast <2 x float> %fabs to i64 + ret i64 %ret +; CHECK: mvn r0, #-2147483648 +; CHECK-NOT: vabs +} Index: llvm/trunk/test/CodeGen/X86/vec_fabs.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_fabs.ll +++ llvm/trunk/test/CodeGen/X86/vec_fabs.ll @@ -38,21 +38,38 @@ declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p) ; PR20354: when generating code for a vector fabs op, -; make sure the correct mask is used for all vector elements. -; CHECK-LABEL: .LCPI4_0: -; CHECK-NEXT: .long 2147483647 -; CHECK-NEXT: .long 2147483647 -define i64 @fabs_v2f32(<2 x float> %v) { -; CHECK-LABEL: fabs_v2f32: -; CHECK: movabsq $-9223372034707292160, %[[R:r[^ ]+]] -; CHECK-NEXT: vmovq %[[R]], %[[X:xmm[0-9]+]] -; CHECK-NEXT: vandps {{.*}}.LCPI4_0{{.*}}, %[[X]], %[[X]] -; CHECK-NEXT: vmovq %[[X]], %rax -; CHECK-NEXT: retq - %highbits = bitcast i64 9223372039002259456 to <2 x float> ; 0x8000_0000_8000_0000 - %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %highbits) - %ret = bitcast <2 x float> %fabs to i64 - ret i64 %ret +; make sure that we're only turning off the sign bit of each float value. +; No constant pool loads or vector ops are needed for the fabs of a +; bitcasted integer constant; we should just return an integer constant +; that has the sign bits turned off. +; +; So instead of something like this: +; movabsq (constant pool load of mask for sign bits) +; vmovq (move from integer register to vector/fp register) +; vandps (mask off sign bits) +; vmovq (move vector/fp register back to integer return register) +; +; We should generate: +; mov (put constant value in return register) + +; CHECK-LABEL: fabs_v2f32_1 +define i64 @fabs_v2f32_1() { + %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000 + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) + %ret = bitcast <2 x float> %fabs to i64 + ret i64 %ret +; CHECK: movabsq $9223372032559808512, %rax +; # imm = 0x7FFF_FFFF_0000_0000 +} + +; CHECK-LABEL: fabs_v2f32_2 +define i64 @fabs_v2f32_2() { + %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) + %ret = bitcast <2 x float> %fabs to i64 + ret i64 %ret +; CHECK: movl $2147483647, %eax +; # imm = 0x0000_0000_7FFF_FFFF } declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)