Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1140,9 +1140,48 @@ return true; } +/// These are the ingredients in an alternate form binary operator as described +/// below. +struct BinopElts { + BinaryOperator::BinaryOps Opcode; + Value *Op0; + Value *Op1; +}; + +/// Binops may be transformed into binops with different opcodes and operands. +/// Reverse the usual canonicalization to enable folds with the non-canonical +/// form of the binop. If a transform is possible, return the elements of the +/// new binop. If not, return invalid elements. +static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) { + switch (BO->getOpcode()) { + case Instruction::Shl: { + // shl X, C --> mul X, 1 << C + Constant *C; + if (match(BO->getOperand(1), m_Constant(C))) { + C = ConstantExpr::getShl(ConstantInt::get(BO->getType(), 1), C); + return { Instruction::Mul, BO->getOperand(0), C }; + } + break; + } + case Instruction::Or: { + // or X, C --> add X, C (when X and C have no common bits set) + const APInt *C; + if (match(BO->getOperand(1), m_APInt(C)) && + MaskedValueIsZero(BO->getOperand(0), *C, DL)) { + return { Instruction::Add, BO->getOperand(0), BO->getOperand(1) }; + } + break; + } + default: + break; + } + return { (BinaryOperator::BinaryOps)0, nullptr, nullptr }; +} + +/// Try to fold shuffles that are the equivalent of a vector select. static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf, - InstCombiner::BuilderTy &Builder) { - // Folds under here require the equivalent of a vector select. + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { if (!Shuf.isSelect()) return nullptr; @@ -1168,19 +1207,22 @@ BinaryOperator::BinaryOps Opc1 = B1->getOpcode(); bool DropNSW = false; if (ConstantsAreOp1 && Opc0 != Opc1) { - // If we have multiply and shift-left-by-constant, convert the shift: - // shl X, C --> mul X, 1 << C // TODO: We drop "nsw" if shift is converted into multiply because it may // not be correct when the shift amount is BitWidth - 1. We could examine // each vector element to determine if it is safe to keep that flag. - if (Opc0 == Instruction::Mul && Opc1 == Instruction::Shl) { - C1 = ConstantExpr::getShl(ConstantInt::get(C1->getType(), 1), C1); - Opc1 = Instruction::Mul; - DropNSW = true; - } else if (Opc0 == Instruction::Shl && Opc1 == Instruction::Mul) { - C0 = ConstantExpr::getShl(ConstantInt::get(C0->getType(), 1), C0); - Opc0 = Instruction::Mul; + if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl) DropNSW = true; + BinopElts AltB0 = getAlternateBinop(B0, DL); + if (AltB0.Opcode && isa(AltB0.Op1)) { + Opc0 = AltB0.Opcode; + C0 = cast(AltB0.Op1); + } else { + // Try again with B1. + BinopElts AltB1 = getAlternateBinop(B1, DL); + if (AltB1.Opcode && isa(AltB1.Op1)) { + Opc1 = AltB1.Opcode; + C1 = cast(AltB1.Op1); + } } } @@ -1249,7 +1291,7 @@ LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI))) return replaceInstUsesWith(SVI, V); - if (Instruction *I = foldSelectShuffle(SVI, Builder)) + if (Instruction *I = foldSelectShuffle(SVI, Builder, DL)) return I; bool MadeChange = false; Index: test/Transforms/InstCombine/shuffle_select.ll =================================================================== --- test/Transforms/InstCombine/shuffle_select.ll +++ test/Transforms/InstCombine/shuffle_select.ll @@ -582,9 +582,7 @@ define <4 x i32> @add_or(<4 x i32> %v) { ; CHECK-LABEL: @add_or( ; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[V0]], -; CHECK-NEXT: [[T2:%.*]] = or <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %v0 = shl <4 x i32> %v, ; clear the bottom bits @@ -599,9 +597,7 @@ define <4 x i8> @or_add(<4 x i8> %v) { ; CHECK-LABEL: @or_add( ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], -; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], -; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T1]], <4 x i8> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = add nsw <4 x i8> [[V0]], ; CHECK-NEXT: ret <4 x i8> [[T3]] ; %v0 = lshr <4 x i8> %v, ; clear the top bits @@ -611,6 +607,8 @@ ret <4 x i8> %t3 } +; Negative test: not all 'or' insts can be converted to 'add'. + define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { ; CHECK-LABEL: @or_add_not_enough_masking( ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], @@ -631,9 +629,8 @@ define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { ; CHECK-LABEL: @add_or_2_vars( ; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[V1:%.*]], -; CHECK-NEXT: [[T2:%.*]] = or <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %v0 = shl <4 x i32> %v, ; clear the bottom bits @@ -646,9 +643,8 @@ define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { ; CHECK-LABEL: @or_add_2_vars( ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], -; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], -; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <4 x i8> [[V1:%.*]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T1]], <4 x i8> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V0]], <4 x i8> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = add <4 x i8> [[TMP1]], ; CHECK-NEXT: ret <4 x i8> [[T3]] ; %v0 = lshr <4 x i8> %v, ; clear the top bits