diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -141,6 +141,8 @@ SmallPtrSet SafeToPromote; SmallPtrSet SafeWrap; + // Does V already have the same size as RegisterBitWidth. + bool IsNativeWidth(Value *V); // Does V have the same size result type as TypeSize. bool EqualTypeSize(Value *V); // Does V have the same size, or narrower, result type as TypeSize. @@ -193,6 +195,10 @@ Opc == Instruction::SRem || Opc == Instruction::SExt; } +bool TypePromotion::IsNativeWidth(Value *V) { + return V->getType()->getScalarSizeInBits() == RegisterBitWidth; +} + bool TypePromotion::EqualTypeSize(Value *V) { return V->getType()->getScalarSizeInBits() == TypeSize; } @@ -712,14 +718,16 @@ return isSupportedType(I); case Instruction::ZExt: return isSupportedType(I->getOperand(0)); - case Instruction::ICmp: + case Instruction::ICmp: { + Value *Op = I->getOperand(0); // Now that we allow small types than TypeSize, only allow icmp of // TypeSize because they will require a trunc to be legalised. // TODO: Allow icmp of smaller types, and calculate at the end // whether the transform would be beneficial. - if (isa(I->getOperand(0)->getType())) + if (isa(Op->getType())) return true; - return EqualTypeSize(I->getOperand(0)); + return IsNativeWidth(Op) || EqualTypeSize(Op); + } case Instruction::Call: { // Special cases for calls as we need to check for zeroext // TODO We should accept calls even if they don't have zeroext, as they @@ -785,6 +793,12 @@ if (isa(V)) return true; + // The search may come across icmps that already use native types, so we + // don't have to search their operands. + if (auto *ICmp = dyn_cast(V)) + if (IsNativeWidth(ICmp->getOperand(0))) + return true; + if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) { LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n"); return false; diff --git a/llvm/test/Transforms/TypePromotion/ARM/casts.ll b/llvm/test/Transforms/TypePromotion/ARM/casts.ll --- a/llvm/test/Transforms/TypePromotion/ARM/casts.ll +++ b/llvm/test/Transforms/TypePromotion/ARM/casts.ll @@ -155,7 +155,7 @@ ret i1 %or } -; We currently only handle truncs as sinks, so a uxt will still be needed for +; We currently only handle truncs as sources, so a uxt will still be needed for ; the icmp ugt instruction. define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) { ; CHECK-LABEL: @urem_trunc_icmps( @@ -409,25 +409,25 @@ ret i8 %retval } -; TODO: We should be able to remove the uxtb here. The transform fails because -; the icmp ugt uses an i32, which is too large... but this doesn't matter -; because it won't be writing a large value to a register as a result. define i8 @search_through_zext_2(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c, i32 %d) { ; CHECK-LABEL: @search_through_zext_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[ADD]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[CONV]], [[C:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[B:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[C:%.*]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[SUB:%.*]] = sub nuw i8 [[B]], [[A]] -; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[SUB]] to i32 -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[CONV2]], [[D:%.*]] -; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP2]], i8 [[A]], i8 [[B]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[SUB]] to i8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[SUB]], [[D:%.*]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP2]], i32 [[TMP0]], i32 [[TMP1]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[RES]], [[IF_THEN]] ] -; CHECK-NEXT: ret i8 [[RETVAL]] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RES]], [[IF_THEN]] ] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[RETVAL]] to i8 +; CHECK-NEXT: ret i8 [[TMP4]] ; entry: %add = add nuw i8 %a, %b @@ -453,20 +453,23 @@ define i8 @search_through_zext_3(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c, i32 %d) { ; CHECK-LABEL: @search_through_zext_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[ADD]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[CONV]], [[C:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[B:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[C:%.*]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i16 [[CONV]] to i8 -; CHECK-NEXT: [[SUB:%.*]] = sub nuw i8 [[B]], [[TRUNC]] -; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[SUB]] to i32 -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[CONV2]], [[D:%.*]] -; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP2]], i8 [[A]], i8 [[B]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[ADD]], 255 +; CHECK-NEXT: [[SUB:%.*]] = sub nuw i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[SUB]] to i8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[SUB]], [[D:%.*]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP2]], i32 [[TMP0]], i32 [[TMP1]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[RES]], [[IF_THEN]] ] -; CHECK-NEXT: ret i8 [[RETVAL]] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RES]], [[IF_THEN]] ] +; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[RETVAL]] to i8 +; CHECK-NEXT: ret i8 [[TMP5]] ; entry: %add = add nuw i8 %a, %b @@ -490,20 +493,23 @@ define i8 @search_through_zext_4(i8 zeroext %a, i8 zeroext %b, i16 zeroext %c, i32 %d) { ; CHECK-LABEL: @search_through_zext_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV_0:%.*]] = zext i8 [[A:%.*]] to i16 -; CHECK-NEXT: [[ADD:%.*]] = add nuw i16 [[CONV_0]], [[C:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ADD]], [[C]] +; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[C:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i16 [[ADD]] to i8 -; CHECK-NEXT: [[SUB:%.*]] = sub nuw i8 [[B:%.*]], [[TRUNC]] -; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[SUB]] to i32 -; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[CONV2]], [[D:%.*]] -; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP2]], i8 [[A]], i8 [[B]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[ADD]], 255 +; CHECK-NEXT: [[SUB:%.*]] = sub nuw i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[SUB]] to i8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[SUB]], [[D:%.*]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP2]], i32 [[TMP1]], i32 [[TMP2]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[RES]], [[IF_THEN]] ] -; CHECK-NEXT: ret i8 [[RETVAL]] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RES]], [[IF_THEN]] ] +; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[RETVAL]] to i8 +; CHECK-NEXT: ret i8 [[TMP5]] ; entry: %conv.0 = zext i8 %a to i16