Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -42,6 +42,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -1195,6 +1196,11 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { if (!Inst.getType()->isVectorTy()) return nullptr; + // It may not be safe to reorder shuffles and things like div, urem, etc. + // because we may trap when executing those ops on unknown vector elements. + // See PR20059. + if (!isSafeToSpeculativelyExecute(&Inst)) return nullptr; + unsigned VWidth = cast(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); assert(cast(LHS->getType())->getNumElements() == VWidth); Index: test/Transforms/InstCombine/pr20059.ll =================================================================== --- test/Transforms/InstCombine/pr20059.ll +++ test/Transforms/InstCombine/pr20059.ll @@ -0,0 +1,16 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed +; for an srem operation. This is not a valid optimization because it may cause a trap +; on div-by-zero. + +; CHECK-LABEL: @do_not_reorder +; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2 +define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) { + %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer + %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer + %retval = srem <4 x i32> %splat1, %splat2 + ret <4 x i32> %retval +}