Index: lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- lib/Transforms/InstCombine/InstructionCombining.cpp
+++ lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1245,6 +1245,7 @@
 /// specified one but with other operands.
 static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
                                  InstCombiner::BuilderTy *B) {
+  // FIXME: Propagate fast-math-flags.
   Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
   if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BORes)) {
     if (isa<OverflowingBinaryOperator>(NewBO)) {
@@ -1270,6 +1271,20 @@
   if (!isSafeToSpeculativelyExecute(&Inst))
     return nullptr;
 
+  // The shuffle transformations below may create floating-point math operations
+  // on values not specified by the program. Those values may include denormals.
+  // Operating on denormals may be extremely expensive or dangerous (PR20358).
+  // If unsafe algebra is not allowed, bail out to avoid that possibility.
+
+  // TODO: There is no direct connection between unsafe algebra and denormal
+  // handling, but it is likely that an unsafe FP environment is also an
+  // environment where denormals are automatically flushed to zero. If support
+  // for detecting/changing the FP environment is added, this check should be
+  // improved to directly query the settings for denormals.
+  
+  if (isa<FPMathOperator>(Inst) && !Inst.hasUnsafeAlgebra())
+    return nullptr;
+
   unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
   Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
   assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth);
Index: test/Transforms/InstCombine/vec_shuffle.ll
===================================================================
--- test/Transforms/InstCombine/vec_shuffle.ll
+++ test/Transforms/InstCombine/vec_shuffle.ll
@@ -310,11 +310,30 @@
   ret <4 x i32> %r
 }
 
+; If the FP operation is 'fast', hoist it to eliminate a shuffle.
+ 
 define <4 x float> @shuffle_17fsub(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
 ; CHECK-LABEL: @shuffle_17fsub(
-; CHECK-NOT: shufflevector
-; CHECK: fsub <4 x float> %v1, %v2
-; CHECK: shufflevector
+; CHECK-NEXT: fsub <4 x float> %v1, %v2
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret <4 x float>
+  %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = fsub fast <4 x float> %t1, %t2
+  ret <4 x float> %r
+}
+
+; If the FP operation is not 'fast', do not risk operating on denormals:
+; https://llvm.org/bugs/show_bug.cgi?id=20358
+
+define <4 x float> @pr20358(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
+; CHECK-LABEL: @pr20358(
+; CHECK-NEXT:  %t1 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:  %t2 = shufflevector <4 x float> %v2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:  %r = fsub <4 x float> %t1, %t2
+; CHECK-NEXT:  ret <4 x float> %r
   %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer,
                       <4 x i32> <i32 1, i32 2, i32 3, i32 0>
   %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer,