Index: llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -113,7 +113,11 @@ SmallPtrSet Promoted; Module *M = nullptr; LLVMContext &Ctx; + // The type we promote to: always i32 IntegerType *ExtTy = nullptr; + // The type of the value that the search began from, either i8 or i16. + // This defines the max range of the values that we allow in the promoted + // tree. IntegerType *OrigTy = nullptr; SmallPtrSetImpl *Visited; SmallPtrSetImpl *Sources; @@ -326,7 +330,7 @@ // - (255 >= 254) == (0xFFFFFFFF >= 254) == true // // To demonstrate why we can't handle increasing values: - // + // // %add = add i8 %a, 2 // %cmp = icmp ult i8 %add, 127 // @@ -604,7 +608,7 @@ if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I)) continue; - + assert(EnableDSP && "DSP intrinisc insertion not enabled!"); // Replace unsafe instructions with appropriate intrinsic calls. @@ -685,10 +689,10 @@ // Some zexts will now have become redundant, along with their trunc // operands, so remove them for (auto V : *Visited) { - if (!isa(V)) + if (!isa(V)) continue; - auto ZExt = cast(V); + auto ZExt = cast(V); if (ZExt->getDestTy() != ExtTy) continue; @@ -700,9 +704,11 @@ continue; } - // For any truncs that we insert to handle zexts, we can replace the - // result of the zext with the input to the trunc. - if (NewInsts.count(Src) && isa(V) && isa(Src)) { + // Unless they produce a value that is narrower than ExtTy, we can + // replace the result of the zext with the input of a newly inserted + // trunc. + if (NewInsts.count(Src) && isa(Src) && + Src->getType() == OrigTy) { auto *Trunc = cast(Src); assert(Trunc->getOperand(0)->getType() == ExtTy && "expected inserted trunc to be operating on i32"); Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll @@ -588,3 +588,47 @@ store i8 %cond, i8* @a, align 1 ret void } + +; CHECK-LABEL: dont_replace_trunc_1 +; CHECK: cmp +; CHECK: uxtb +define void @dont_replace_trunc_1(i8* %a, i16* %b, i16* %c, i32* %d, i8* %e, i32* %f) { +entry: + %0 = load i16, i16* %c, align 2 + %1 = load i16, i16* %b, align 2 + %conv = sext i16 %1 to i32 + store i32 %conv, i32* %f, align 4 + %2 = trunc i16 %1 to i8 + %conv1 = and i8 %2, 1 + store i8 %conv1, i8* %e, align 1 + %3 = load i8, i8* %a, align 1 + %narrow = mul nuw i8 %3, %conv1 + %mul = zext i8 %narrow to i32 + store i32 %mul, i32* %d, align 4 + %4 = zext i8 %narrow to i16 + %conv5 = or i16 %0, %4 + %tobool = icmp eq i16 %conv5, 0 + br i1 %tobool, label %if.end, label %for.cond + +for.cond: ; preds = %entry, %for.cond + br label %for.cond + +if.end: ; preds = %entry + ret void +} + +; CHECK-LABEL: dont_replace_trunc_2 +; CHECK: cmp +; CHECK: uxtb +define i32 @dont_replace_trunc_2(i16* %a, i8* %b) { +entry: + %0 = load i16, i16* %a, align 2 + %cmp = icmp ugt i16 %0, 8 + %narrow = select i1 %cmp, i16 %0, i16 0 + %cond = trunc i16 %narrow to i8 + %1 = load i8, i8* %b, align 1 + %or = or i8 %1, %cond + store i8 %or, i8* %b, align 1 + %conv5 = zext i8 %or to i32 + ret i32 %conv5 +}