Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1273,9 +1273,6 @@ bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) - return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); @@ -1601,9 +1598,6 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) - return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); Index: lib/Transforms/InstCombine/InstCombineMulDivRem.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -434,9 +434,6 @@ bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) - return ReplaceInstUsesWith(I, V); - if (isa(Op0)) std::swap(Op0, Op1); @@ -1061,9 +1058,6 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) - return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyFDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1335,9 +1329,6 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) - return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyFRemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1202,6 +1202,11 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { if (!Inst.getType()->isVectorTy()) return nullptr; + // It is potentially harmful to operate on unknown FP vector elements. + // Eg, FP ops using denormals can take over 10x longer than normals. + assert(!Inst.getType()->isFPOrFPVectorTy() && + "Attempting to transform Vector FP operation to use unknown elements"); + // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. Index: test/Transforms/InstCombine/pr20059.ll =================================================================== --- test/Transforms/InstCombine/pr20059.ll +++ test/Transforms/InstCombine/pr20059.ll @@ -1,16 +0,0 @@ -; RUN: opt -S -instcombine < %s | FileCheck %s - -; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed -; for an srem operation. This is not a valid optimization because it may cause a trap -; on div-by-zero. - -; CHECK-LABEL: @do_not_reorder -; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2 -define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) { - %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer - %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer - %retval = srem <4 x i32> %splat1, %splat2 - ret <4 x i32> %retval -} Index: test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- test/Transforms/InstCombine/vec_shuffle.ll +++ test/Transforms/InstCombine/vec_shuffle.ll @@ -310,19 +310,6 @@ ret <4 x i32> %r } -define <4 x float> @shuffle_17fsub(<4 x float> %v1, <4 x float> %v2) nounwind uwtable { -; CHECK-LABEL: @shuffle_17fsub( -; CHECK-NOT: shufflevector -; CHECK: fsub <4 x float> %v1, %v2 -; CHECK: shufflevector - %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer, - <4 x i32> - %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer, - <4 x i32> - %r = fsub <4 x float> %t1, %t2 - ret <4 x float> %r -} - define <4 x i32> @shuffle_17addconst(<4 x i32> %v1, <4 x i32> %v2) { ; CHECK-LABEL: @shuffle_17addconst( ; CHECK-NOT: shufflevector @@ -414,3 +401,33 @@ %masked_new.i.i.i = and <4 x i32> bitcast (<2 x i64> (<4 x i32>)* @pr20114 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64)> to <4 x i32>), %mask01.i ret <4 x i32> %masked_new.i.i.i } + +; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed +; for an srem operation. This is not a valid optimization because it may cause a trap +; on div-by-zero. + +define <4 x i32> @pr20059(<4 x i32> %p1, <4 x i32> %p2) { +; CHECK-LABEL: @pr20059 +; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2 + %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer + %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer + %retval = srem <4 x i32> %splat1, %splat2 + ret <4 x i32> %retval +} + +; In PR20358 ( http://llvm.org/pr20358 ), shufflevector operations are reordered/removed +; for an FP mul operation. This may not be a profitable optimization because it may +; cause operations on denormals. + +define <4 x float> @pr20358(<4 x float> %p1, <4 x float> %p2) { +; CHECK-LABEL: @pr20358 +; CHECK: %splat1 = shufflevector <4 x float> %p1, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %splat2 = shufflevector <4 x float> %p2, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %retval = fmul <4 x float> %splat1, %splat2 + %splat1 = shufflevector <4 x float> %p1, <4 x float> undef, <4 x i32> zeroinitializer + %splat2 = shufflevector <4 x float> %p2, <4 x float> undef, <4 x i32> zeroinitializer + %retval = fmul <4 x float> %splat1, %splat2 + ret <4 x float> %retval +}