Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -1196,6 +1196,28 @@ return MadeChange; } +// Strength reduce cmp (xor (a, -1), xor(b, -1)) => cmp (b, a) +// This breaks apart the representation of a min/max (cmp + select), +// but can reduce register pressure in some cases. +// We cant do this in ISEL as the -1 may have been turned to a constant +// and hoisted out of the block (hence the bitcasts). +static bool StrengthReduceCmpXor(CmpInst *CI, const TargetLowering *TLI) { + Value *A, *B; + if (CI->isIntPredicate() && + (match(CI->getOperand(0), m_Xor(m_Value(A), m_ConstantInt<-1>())) || + match(CI->getOperand(0), + m_Xor(m_Value(A), m_BitCast(m_ConstantInt<-1>())))) && + (match(CI->getOperand(1), m_Xor(m_Value(B), m_ConstantInt<-1>())) || + match(CI->getOperand(1), + m_Xor(m_Value(B), m_BitCast(m_ConstantInt<-1>()))))) { + CI->setOperand(0, B); + CI->setOperand(1, A); + return true; + } + + return false; +} + static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) { if (SinkCmpExpression(CI, TLI)) return true; @@ -1203,6 +1225,9 @@ if (CombineUAddWithOverflow(CI)) return true; + if (StrengthReduceCmpXor(CI, TLI)) + return true; + return false; } Index: test/Transforms/CodeGenPrepare/ARM/xorcmp.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/ARM/xorcmp.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare < %s | FileCheck %s +; RUN: opt -S -consthoist -codegenprepare < %s | FileCheck %s --check-prefix=CONSTHOIST + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8m.baseline-none-eabi" + +define void @test(i8 %a, i8 %b, i8 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AN:%.*]] = xor i8 [[A:%.*]], -1 +; CHECK-NEXT: [[BN:%.*]] = xor i8 [[B:%.*]], -1 +; CHECK-NEXT: [[CN:%.*]] = xor i8 [[C:%.*]], -1 +; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i8 [[C]], [[A]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP15]], i8 [[AN]], i8 [[CN]] +; CHECK-NEXT: [[CMP16:%.*]] = icmp ult i8 [[COND]], [[BN]] +; CHECK-NEXT: [[XK_0_IN:%.*]] = select i1 [[CMP16]], i8 [[COND]], i8 [[BN]] +; CHECK-NEXT: [[CONV34:%.*]] = sub i8 [[AN]], [[XK_0_IN]] +; CHECK-NEXT: [[CONV38:%.*]] = sub i8 [[BN]], [[XK_0_IN]] +; CHECK-NEXT: [[CONV42:%.*]] = sub i8 [[CN]], [[XK_0_IN]] +; CHECK-NEXT: call void @use4(i8 [[CONV34]], i8 [[CONV38]], i8 [[CONV42]], i8 [[XK_0_IN]]) +; CHECK-NEXT: ret void +; +; CONSTHOIST-LABEL: @test( +; CONSTHOIST-NEXT: entry: +; CONSTHOIST-NEXT: [[CONST:%.*]] = bitcast i8 -1 to i8 +; CONSTHOIST-NEXT: [[AN:%.*]] = xor i8 [[A:%.*]], [[CONST]] +; CONSTHOIST-NEXT: [[BN:%.*]] = xor i8 [[B:%.*]], [[CONST]] +; CONSTHOIST-NEXT: [[CN:%.*]] = xor i8 [[C:%.*]], [[CONST]] +; CONSTHOIST-NEXT: [[CMP15:%.*]] = icmp ult i8 [[C]], [[A]] +; CONSTHOIST-NEXT: [[COND:%.*]] = select i1 [[CMP15]], i8 [[AN]], i8 [[CN]] +; CONSTHOIST-NEXT: [[CMP16:%.*]] = icmp ult i8 [[COND]], [[BN]] +; CONSTHOIST-NEXT: [[XK_0_IN:%.*]] = select i1 [[CMP16]], i8 [[COND]], i8 [[BN]] +; CONSTHOIST-NEXT: [[CONV34:%.*]] = sub i8 [[AN]], [[XK_0_IN]] +; CONSTHOIST-NEXT: [[CONV38:%.*]] = sub i8 [[BN]], [[XK_0_IN]] +; CONSTHOIST-NEXT: [[CONV42:%.*]] = sub i8 [[CN]], [[XK_0_IN]] +; CONSTHOIST-NEXT: call void @use4(i8 [[CONV34]], i8 [[CONV38]], i8 [[CONV42]], i8 [[XK_0_IN]]) +; CONSTHOIST-NEXT: ret void +; +entry: + %an = xor i8 %a, -1 + %bn = xor i8 %b, -1 + %cn = xor i8 %c, -1 + %cmp15 = icmp ult i8 %an, %cn + %cond = select i1 %cmp15, i8 %an, i8 %cn + %cmp16 = icmp ult i8 %cond, %bn + %xk.0.in = select i1 %cmp16, i8 %cond, i8 %bn + %conv34 = sub i8 %an, %xk.0.in + %conv38 = sub i8 %bn, %xk.0.in + %conv42 = sub i8 %cn, %xk.0.in + call void @use4(i8 %conv34, i8 %conv38, i8 %conv42, i8 %xk.0.in) + ret void +} + + +define void @testloop(i32 %I, i8* nocapture readonly %A, i8* nocapture %B) { +; CHECK-LABEL: @testloop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP74:%.*]] = icmp sgt i32 [[I:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP74]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_077:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[B_ADDR_076:%.*]] = phi i8* [ [[INCDEC_PTR47:%.*]], [[FOR_BODY]] ], [ [[B:%.*]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[A_ADDR_075:%.*]] = phi i8* [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_075]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[A_ADDR_075]], align 1 +; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_075]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[INCDEC_PTR2]] = getelementptr inbounds i8, i8* [[A_ADDR_075]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[INCDEC_PTR1]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[TMP0]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP1]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[CMP16:%.*]] = icmp ult i8 [[TMP2]], [[TMP0]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP16]], i8 [[TMP3]], i8 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i8 [[COND]], [[TMP4]] +; CHECK-NEXT: [[XK_0_IN:%.*]] = select i1 [[TMP6]], i8 [[COND]], i8 [[TMP4]] +; CHECK-NEXT: [[CONV35:%.*]] = sub i8 [[TMP3]], [[XK_0_IN]] +; CHECK-NEXT: [[CONV39:%.*]] = sub i8 [[TMP4]], [[XK_0_IN]] +; CHECK-NEXT: [[CONV43:%.*]] = sub i8 [[TMP5]], [[XK_0_IN]] +; CHECK-NEXT: [[INCDEC_PTR44:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_076]], i32 1 +; CHECK-NEXT: store i8 [[XK_0_IN]], i8* [[B_ADDR_076]], align 1 +; CHECK-NEXT: [[INCDEC_PTR45:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_076]], i32 2 +; CHECK-NEXT: store i8 [[CONV35]], i8* [[INCDEC_PTR44]], align 1 +; CHECK-NEXT: [[INCDEC_PTR46:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_076]], i32 3 +; CHECK-NEXT: store i8 [[CONV39]], i8* [[INCDEC_PTR45]], align 1 +; CHECK-NEXT: [[INCDEC_PTR47]] = getelementptr inbounds i8, i8* [[B_ADDR_076]], i32 4 +; CHECK-NEXT: store i8 [[CONV43]], i8* [[INCDEC_PTR46]], align 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_077]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[I]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +; CONSTHOIST-LABEL: @testloop( +; CONSTHOIST-NEXT: entry: +; CONSTHOIST-NEXT: [[CMP74:%.*]] = icmp sgt i32 [[I:%.*]], 0 +; CONSTHOIST-NEXT: br i1 [[CMP74]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CONSTHOIST: for.body.preheader: +; CONSTHOIST-NEXT: [[CONST:%.*]] = bitcast i8 -1 to i8 +; CONSTHOIST-NEXT: br label [[FOR_BODY:%.*]] +; CONSTHOIST: for.body: +; CONSTHOIST-NEXT: [[I_077:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CONSTHOIST-NEXT: [[B_ADDR_076:%.*]] = phi i8* [ [[INCDEC_PTR47:%.*]], [[FOR_BODY]] ], [ [[B:%.*]], [[FOR_BODY_PREHEADER]] ] +; CONSTHOIST-NEXT: [[A_ADDR_075:%.*]] = phi i8* [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ] +; CONSTHOIST-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_075]], i32 1 +; CONSTHOIST-NEXT: [[TMP0:%.*]] = load i8, i8* [[A_ADDR_075]], align 1 +; CONSTHOIST-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_075]], i32 2 +; CONSTHOIST-NEXT: [[TMP1:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 +; CONSTHOIST-NEXT: [[INCDEC_PTR2]] = getelementptr inbounds i8, i8* [[A_ADDR_075]], i32 3 +; CONSTHOIST-NEXT: [[TMP2:%.*]] = load i8, i8* [[INCDEC_PTR1]], align 1 +; CONSTHOIST-NEXT: [[TMP3:%.*]] = xor i8 [[TMP0]], [[CONST]] +; CONSTHOIST-NEXT: [[TMP4:%.*]] = xor i8 [[TMP1]], [[CONST]] +; CONSTHOIST-NEXT: [[TMP5:%.*]] = xor i8 [[TMP2]], [[CONST]] +; CONSTHOIST-NEXT: [[CMP16:%.*]] = icmp ult i8 [[TMP2]], [[TMP0]] +; CONSTHOIST-NEXT: [[COND:%.*]] = select i1 [[CMP16]], i8 [[TMP3]], i8 [[TMP5]] +; CONSTHOIST-NEXT: [[TMP6:%.*]] = icmp ult i8 [[COND]], [[TMP4]] +; CONSTHOIST-NEXT: [[XK_0_IN:%.*]] = select i1 [[TMP6]], i8 [[COND]], i8 [[TMP4]] +; CONSTHOIST-NEXT: [[CONV35:%.*]] = sub i8 [[TMP3]], [[XK_0_IN]] +; CONSTHOIST-NEXT: [[CONV39:%.*]] = sub i8 [[TMP4]], [[XK_0_IN]] +; CONSTHOIST-NEXT: [[CONV43:%.*]] = sub i8 [[TMP5]], [[XK_0_IN]] +; CONSTHOIST-NEXT: [[INCDEC_PTR44:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_076]], i32 1 +; CONSTHOIST-NEXT: store i8 [[XK_0_IN]], i8* [[B_ADDR_076]], align 1 +; CONSTHOIST-NEXT: [[INCDEC_PTR45:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_076]], i32 2 +; CONSTHOIST-NEXT: store i8 [[CONV35]], i8* [[INCDEC_PTR44]], align 1 +; CONSTHOIST-NEXT: [[INCDEC_PTR46:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_076]], i32 3 +; CONSTHOIST-NEXT: store i8 [[CONV39]], i8* [[INCDEC_PTR45]], align 1 +; CONSTHOIST-NEXT: [[INCDEC_PTR47]] = getelementptr inbounds i8, i8* [[B_ADDR_076]], i32 4 +; CONSTHOIST-NEXT: store i8 [[CONV43]], i8* [[INCDEC_PTR46]], align 1 +; CONSTHOIST-NEXT: [[INC]] = add nuw nsw i32 [[I_077]], 1 +; CONSTHOIST-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[I]] +; CONSTHOIST-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; CONSTHOIST: for.cond.cleanup: +; CONSTHOIST-NEXT: ret void +; +entry: + %cmp74 = icmp sgt i32 %I, 0 + br i1 %cmp74, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + br label %for.body + +for.body: + %i.077 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %B.addr.076 = phi i8* [ %incdec.ptr47, %for.body ], [ %B, %for.body.preheader ] + %A.addr.075 = phi i8* [ %incdec.ptr2, %for.body ], [ %A, %for.body.preheader ] + %incdec.ptr = getelementptr inbounds i8, i8* %A.addr.075, i32 1 + %0 = load i8, i8* %A.addr.075, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %A.addr.075, i32 2 + %1 = load i8, i8* %incdec.ptr, align 1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %A.addr.075, i32 3 + %2 = load i8, i8* %incdec.ptr1, align 1 + %3 = xor i8 %0, -1 + %4 = xor i8 %1, -1 + %5 = xor i8 %2, -1 + %cmp16 = icmp ult i8 %3, %5 + %cond = select i1 %cmp16, i8 %3, i8 %5 + %6 = icmp ult i8 %cond, %4 + %xk.0.in = select i1 %6, i8 %cond, i8 %4 + %conv35 = sub i8 %3, %xk.0.in + %conv39 = sub i8 %4, %xk.0.in + %conv43 = sub i8 %5, %xk.0.in + %incdec.ptr44 = getelementptr inbounds i8, i8* %B.addr.076, i32 1 + store i8 %xk.0.in, i8* %B.addr.076, align 1 + %incdec.ptr45 = getelementptr inbounds i8, i8* %B.addr.076, i32 2 + store i8 %conv35, i8* %incdec.ptr44, align 1 + %incdec.ptr46 = getelementptr inbounds i8, i8* %B.addr.076, i32 3 + store i8 %conv39, i8* %incdec.ptr45, align 1 + %incdec.ptr47 = getelementptr inbounds i8, i8* %B.addr.076, i32 4 + store i8 %conv43, i8* %incdec.ptr46, align 1 + %inc = add nuw nsw i32 %i.077, 1 + %exitcond = icmp eq i32 %inc, %I + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +declare void @use4(i8, i8, i8, i8)