Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1245,6 +1245,7 @@ /// specified one but with other operands. static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, InstCombiner::BuilderTy *B) { + // FIXME: Propagate fast-math-flags. Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS); if (BinaryOperator *NewBO = dyn_cast(BORes)) { if (isa(NewBO)) { @@ -1270,6 +1271,20 @@ if (!isSafeToSpeculativelyExecute(&Inst)) return nullptr; + // The shuffle transformations below may create floating-point math operations + // on values not specified by the program. Those values may include denormals. + // Operating on denormals may be extremely expensive or dangerous (PR20358). + // If unsafe algebra is not allowed, bail out to avoid that possibility. + + // TODO: There is no direct connection between unsafe algebra and denormal + // handling, but it is likely that an unsafe FP environment is also an + // environment where denormals are automatically flushed to zero. If support + // for detecting/changing the FP environment is added, this check should be + // improved to directly query the settings for denormals. + + if (isa(Inst) && !Inst.hasUnsafeAlgebra()) + return nullptr; + unsigned VWidth = cast(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); assert(cast(LHS->getType())->getNumElements() == VWidth); Index: test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- test/Transforms/InstCombine/vec_shuffle.ll +++ test/Transforms/InstCombine/vec_shuffle.ll @@ -310,11 +310,30 @@ ret <4 x i32> %r } +; If the FP operation is 'fast', hoist it to eliminate a shuffle. + define <4 x float> @shuffle_17fsub(<4 x float> %v1, <4 x float> %v2) nounwind uwtable { ; CHECK-LABEL: @shuffle_17fsub( -; CHECK-NOT: shufflevector -; CHECK: fsub <4 x float> %v1, %v2 -; CHECK: shufflevector +; CHECK-NEXT: fsub <4 x float> %v1, %v2 +; CHECK-NEXT: shufflevector +; CHECK-NEXT: ret <4 x float> + %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer, + <4 x i32> + %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer, + <4 x i32> + %r = fsub fast <4 x float> %t1, %t2 + ret <4 x float> %r +} + +; If the FP operation is not 'fast', do not risk operating on denormals: +; https://llvm.org/bugs/show_bug.cgi?id=20358 + +define <4 x float> @pr20358(<4 x float> %v1, <4 x float> %v2) nounwind uwtable { +; CHECK-LABEL: @pr20358( +; CHECK-NEXT: %t1 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> +; CHECK-NEXT: %t2 = shufflevector <4 x float> %v2, <4 x float> undef, <4 x i32> +; CHECK-NEXT: %r = fsub <4 x float> %t1, %t2 +; CHECK-NEXT: ret <4 x float> %r %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer, <4 x i32> %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer,