Index: lib/Target/ARM/ARMCodeGenPrepare.cpp =================================================================== --- lib/Target/ARM/ARMCodeGenPrepare.cpp +++ lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -686,6 +686,7 @@ } void IRPromoter::Cleanup() { + LLVM_DEBUG(dbgs() << "ARM CGP: Cleanup..\n"); // Some zexts will now have become redundant, along with their trunc // operands, so remove them for (auto V : *Visited) { @@ -729,6 +730,7 @@ } void IRPromoter::ConvertTruncs() { + LLVM_DEBUG(dbgs() << "ARM CGP: Converting truncs..\n"); IRBuilder<> Builder{Ctx}; for (auto *V : *Visited) { @@ -736,12 +738,13 @@ continue; auto *Trunc = cast(V); - assert(LessThanTypeSize(Trunc) && "expected narrow trunc"); - Builder.SetInsertPoint(Trunc); - unsigned NumBits = - cast(Trunc->getType())->getScalarSizeInBits(); - ConstantInt *Mask = ConstantInt::get(Ctx, APInt::getMaxValue(NumBits)); + IntegerType *SrcTy = cast(Trunc->getOperand(0)->getType()); + IntegerType *DestTy = cast(TruncTysMap[Trunc][0]); + + unsigned NumBits = DestTy->getScalarSizeInBits(); + ConstantInt *Mask = + ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue()); Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask); if (auto *I = dyn_cast(Masked)) @@ -783,6 +786,12 @@ TruncTysMap[I].push_back(I->getOperand(i)->getType()); } } + for (auto *V : Visited) { + if (!isa(V) || Sources.count(V)) + continue; + auto *Trunc = cast(V); + TruncTysMap[Trunc].push_back(Trunc->getDestTy()); + } // Convert adds and subs using negative immediates to equivalent instructions // that use positive constants. @@ -791,14 +800,14 @@ // Insert zext instructions between sources and their users. ExtendSources(); - // Convert any truncs, that aren't sources, into AND masks. - ConvertTruncs(); - // Promote visited instructions, mutating their types in place. Also insert // DSP intrinsics, if enabled, for adds and subs which would be unsafe to // promote. PromoteTree(); + // Convert any truncs, that aren't sources, into AND masks. + ConvertTruncs(); + // Insert trunc instructions for use by calls, stores etc... TruncateSinks(); Index: test/CodeGen/ARM/CGP/arm-cgp-calls.ll =================================================================== --- test/CodeGen/ARM/CGP/arm-cgp-calls.ll +++ test/CodeGen/ARM/CGP/arm-cgp-calls.ll @@ -201,7 +201,7 @@ } ; CHECK-LABEL: promote_arg_pass_to_call -; CHECK-NOT: uxt +; CHECK: uxtb define i16 @promote_arg_pass_to_call(i16 zeroext %arg1, i16 zeroext %arg2) { %conv = add nuw i16 %arg1, 15 %mul = mul nuw nsw i16 %conv, 3 Index: test/CodeGen/ARM/CGP/arm-cgp-casts.ll =================================================================== --- test/CodeGen/ARM/CGP/arm-cgp-casts.ll +++ test/CodeGen/ARM/CGP/arm-cgp-casts.ll @@ -437,7 +437,7 @@ } ; CHECK-COMMON-LABEL: trunc_sink_less_than_ret -; CHECK-COMMON-NOT: uxt +; CHECK-COMMON: uxt{{.*}}b define i8 @trunc_sink_less_than_ret(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i8 zeroext %d, i8 zeroext %e) { entry: %sub = sub nuw i16 %b, %a @@ -632,3 +632,25 @@ %conv5 = zext i8 %or to i32 ret i32 %conv5 } + +; CHECK-LABEL: replace_trunk_with_mask +; CHECK: udiv +; CHECK: mls +; CHECK: uxtb +define i32 @replace_trunk_with_mask(i16* %a) { +entry: + %0 = load i16, i16* %a + %cmp = icmp eq i16 %0, 0 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: + %1 = urem i16 535, %0 + %.lhs.trunc = trunc i16 %1 to i8 + %2 = udiv i8 %.lhs.trunc, 3 + %phitmp = zext i8 %2 to i32 + br label %cond.end + +cond.end: + %cond = phi i32 [ %phitmp, %cond.false ], [ 0, %entry ] + ret i32 %cond +}