Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -566,6 +566,16 @@ return false; } + /// These two forms are equivalent: + /// sub %y, (xor %x, -1) + /// add (add %x, 1), %y + /// The variant with two add's is IR-canonical. + /// Some targets may prefer one to the other. + virtual bool preferIncOfAddToSubOfNot(EVT VT) const { + // By default, let's assume that everyone prefers the form with two add's. + return true; + } + /// Return true if the target wants to use the optimization that /// turns ext(promotableInst1(...(promotableInstN(load)))) into /// promotedInst1(...(promotedInstN(ext(load)))). Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2424,6 +2424,17 @@ if (Xor) return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0)); } + + // Look for: + // add (add x, y), 1 + // And if the target does not like this form then turn into: + // sub y, (xor x, -1) + if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && + N0.getOpcode() == ISD::ADD) { + SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not); + } } // (x - y) + -1 -> add (xor y, -1), x @@ -2584,6 +2595,17 @@ if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL)) return V; + // Look for: + // add (add x, 1), y + // And if the target does not like this form then turn into: + // sub y, (xor x, -1) + if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && + N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) { + SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), + DAG.getAllOnesConstant(DL, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N1, Not); + } + // Hoist one-use subtraction by non-opaque constant: // (x - C) + y -> (x + y) - C // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. @@ -3108,6 +3130,15 @@ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); } + // Look for: + // sub y, (xor x, -1) + // And if the target does not like this form then turn into: + // add (add x, y), 1 + if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) { + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT)); + } + // Hoist one-use addition by non-opaque constant: // (x + C) - y -> (x - y) + C if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD && Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -497,6 +497,8 @@ return VTIsOk(XVT) && VTIsOk(KeptBitsVT); } + bool preferIncOfAddToSubOfNot(EVT VT) const override; + bool hasBitPreservingFPLogic(EVT VT) const override { // FIXME: Is this always true? It should be true for vectors at least. return VT == MVT::f32 || VT == MVT::f64; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12039,6 +12039,11 @@ return OptSize && !VT.isVector(); } +bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { + // We want inc-of-add for scalars and sub-of-not for vectors. + return VT.isScalarInteger(); +} + bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const { return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint(); } Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -608,6 +608,9 @@ bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; + + bool preferIncOfAddToSubOfNot(EVT VT) const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -10736,6 +10736,15 @@ return false; } +bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { + if (!Subtarget->hasNEON()) { + if (Subtarget->isThumb1Only()) + return VT.getScalarSizeInBits() <= 32; + return true; + } + return VT.isScalarInteger(); +} + static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST) { Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -634,6 +634,8 @@ return true; } + bool preferIncOfAddToSubOfNot(EVT VT) const override; + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { return VT.isScalarInteger(); } Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1279,6 +1279,10 @@ return Subtarget.hasSPE(); } +bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { + return VT.isScalarInteger(); +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; Index: test/CodeGen/AArch64/inc-of-add.ll =================================================================== --- test/CodeGen/AArch64/inc-of-add.ll +++ test/CodeGen/AArch64/inc-of-add.ll @@ -53,9 +53,8 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-LABEL: vector_i128_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: add v0.16b, v0.16b, v1.16b -; CHECK-NEXT: movi v1.16b, #1 -; CHECK-NEXT: add v0.16b, v0.16b, v1.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: sub v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %t0 = add <16 x i8> %x, %t1 = add <16 x i8> %y, %t0 @@ -65,9 +64,8 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; CHECK-LABEL: vector_i128_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h -; CHECK-NEXT: movi v1.8h, #1 -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %t0 = add <8 x i16> %x, %t1 = add <8 x i16> %y, %t0 @@ -77,9 +75,8 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vector_i128_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %x, %t1 = add <4 x i32> %y, %t0 @@ -89,10 +86,8 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: vector_i128_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: add v0.2d, v0.2d, v1.2d -; CHECK-NEXT: dup v1.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: sub v0.2d, v1.2d, v0.2d ; CHECK-NEXT: ret %t0 = add <2 x i64> %x, %t1 = add <2 x i64> %y, %t0 Index: test/CodeGen/AArch64/sub-of-not.ll =================================================================== --- test/CodeGen/AArch64/sub-of-not.ll +++ test/CodeGen/AArch64/sub-of-not.ll @@ -9,8 +9,8 @@ define i8 @scalar_i8(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: sub w0, w1, w8 +; CHECK-NEXT: add w8, w1, w0 +; CHECK-NEXT: add w0, w8, #1 // =1 ; CHECK-NEXT: ret %t0 = xor i8 %x, -1 %t1 = sub i8 %y, %t0 @@ -20,8 +20,8 @@ define i16 @scalar_i16(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: sub w0, w1, w8 +; CHECK-NEXT: add w8, w1, w0 +; CHECK-NEXT: add w0, w8, #1 // =1 ; CHECK-NEXT: ret %t0 = xor i16 %x, -1 %t1 = sub i16 %y, %t0 @@ -31,8 +31,8 @@ define i32 @scalar_i32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: sub w0, w1, w8 +; CHECK-NEXT: add w8, w1, w0 +; CHECK-NEXT: add w0, w8, #1 // =1 ; CHECK-NEXT: ret %t0 = xor i32 %x, -1 %t1 = sub i32 %y, %t0 @@ -42,8 +42,8 @@ define i64 @scalar_i64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x0 -; CHECK-NEXT: sub x0, x1, x8 +; CHECK-NEXT: add x8, x1, x0 +; CHECK-NEXT: add x0, x8, #1 // =1 ; CHECK-NEXT: ret %t0 = xor i64 %x, -1 %t1 = sub i64 %y, %t0 Index: test/CodeGen/ARM/inc-of-add.ll =================================================================== --- test/CodeGen/ARM/inc-of-add.ll +++ test/CodeGen/ARM/inc-of-add.ll @@ -91,11 +91,11 @@ ; ; THUMB6-LABEL: scalar_i64: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: adds r0, r0, r2 -; THUMB6-NEXT: adcs r1, r3 -; THUMB6-NEXT: movs r2, #0 -; THUMB6-NEXT: adds r0, r0, #1 -; THUMB6-NEXT: adcs r1, r2 +; THUMB6-NEXT: mvns r1, r1 +; THUMB6-NEXT: mvns r0, r0 +; THUMB6-NEXT: subs r0, r2, r0 +; THUMB6-NEXT: sbcs r3, r1 +; THUMB6-NEXT: mov r1, r3 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i64: @@ -196,12 +196,11 @@ ; ARM78-LABEL: vector_i128_i8: ; ARM78: @ %bb.0: ; ARM78-NEXT: vmov d17, r2, r3 -; ARM78-NEXT: mov r12, sp ; ARM78-NEXT: vmov d16, r0, r1 -; ARM78-NEXT: vld1.64 {d18, d19}, [r12] -; ARM78-NEXT: vmov.i8 q10, #0x1 -; ARM78-NEXT: vadd.i8 q8, q8, q9 -; ARM78-NEXT: vadd.i8 q8, q8, q10 +; ARM78-NEXT: mov r0, sp +; ARM78-NEXT: vmvn q8, q8 +; ARM78-NEXT: vld1.64 {d18, d19}, [r0] +; ARM78-NEXT: vsub.i8 q8, q9, q8 ; ARM78-NEXT: vmov r0, r1, d16 ; ARM78-NEXT: vmov r2, r3, d17 ; ARM78-NEXT: bx lr @@ -292,12 +291,11 @@ ; THUMB78-LABEL: vector_i128_i8: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: vmov d17, r2, r3 -; THUMB78-NEXT: mov r12, sp ; THUMB78-NEXT: vmov d16, r0, r1 -; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] -; THUMB78-NEXT: vmov.i8 q10, #0x1 -; THUMB78-NEXT: vadd.i8 q8, q8, q9 -; THUMB78-NEXT: vadd.i8 q8, q8, q10 +; THUMB78-NEXT: mov r0, sp +; THUMB78-NEXT: vmvn q8, q8 +; THUMB78-NEXT: vld1.64 {d18, d19}, [r0] +; THUMB78-NEXT: vsub.i8 q8, q9, q8 ; THUMB78-NEXT: vmov r0, r1, d16 ; THUMB78-NEXT: vmov r2, r3, d17 ; THUMB78-NEXT: bx lr @@ -352,12 +350,11 @@ ; ARM78-LABEL: vector_i128_i16: ; ARM78: @ %bb.0: ; ARM78-NEXT: vmov d17, r2, r3 -; ARM78-NEXT: mov r12, sp ; ARM78-NEXT: vmov d16, r0, r1 -; ARM78-NEXT: vld1.64 {d18, d19}, [r12] -; ARM78-NEXT: vmov.i16 q10, #0x1 -; ARM78-NEXT: vadd.i16 q8, q8, q9 -; ARM78-NEXT: vadd.i16 q8, q8, q10 +; ARM78-NEXT: mov r0, sp +; ARM78-NEXT: vmvn q8, q8 +; ARM78-NEXT: vld1.64 {d18, d19}, [r0] +; ARM78-NEXT: vsub.i16 q8, q9, q8 ; ARM78-NEXT: vmov r0, r1, d16 ; ARM78-NEXT: vmov r2, r3, d17 ; ARM78-NEXT: bx lr @@ -408,12 +405,11 @@ ; THUMB78-LABEL: vector_i128_i16: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: vmov d17, r2, r3 -; THUMB78-NEXT: mov r12, sp ; THUMB78-NEXT: vmov d16, r0, r1 -; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] -; THUMB78-NEXT: vmov.i16 q10, #0x1 -; THUMB78-NEXT: vadd.i16 q8, q8, q9 -; THUMB78-NEXT: vadd.i16 q8, q8, q10 +; THUMB78-NEXT: mov r0, sp +; THUMB78-NEXT: vmvn q8, q8 +; THUMB78-NEXT: vld1.64 {d18, d19}, [r0] +; THUMB78-NEXT: vsub.i16 q8, q9, q8 ; THUMB78-NEXT: vmov r0, r1, d16 ; THUMB78-NEXT: vmov r2, r3, d17 ; THUMB78-NEXT: bx lr @@ -442,12 +438,11 @@ ; ARM78-LABEL: vector_i128_i32: ; ARM78: @ %bb.0: ; ARM78-NEXT: vmov d17, r2, r3 -; ARM78-NEXT: mov r12, sp ; ARM78-NEXT: vmov d16, r0, r1 -; ARM78-NEXT: vld1.64 {d18, d19}, [r12] -; ARM78-NEXT: vmov.i32 q10, #0x1 -; ARM78-NEXT: vadd.i32 q8, q8, q9 -; ARM78-NEXT: vadd.i32 q8, q8, q10 +; ARM78-NEXT: mov r0, sp +; ARM78-NEXT: vmvn q8, q8 +; ARM78-NEXT: vld1.64 {d18, d19}, [r0] +; ARM78-NEXT: vsub.i32 q8, q9, q8 ; ARM78-NEXT: vmov r0, r1, d16 ; ARM78-NEXT: vmov r2, r3, d17 ; ARM78-NEXT: bx lr @@ -472,12 +467,11 @@ ; THUMB78-LABEL: vector_i128_i32: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: vmov d17, r2, r3 -; THUMB78-NEXT: mov r12, sp ; THUMB78-NEXT: vmov d16, r0, r1 -; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] -; THUMB78-NEXT: vmov.i32 q10, #0x1 -; THUMB78-NEXT: vadd.i32 q8, q8, q9 -; THUMB78-NEXT: vadd.i32 q8, q8, q10 +; THUMB78-NEXT: mov r0, sp +; THUMB78-NEXT: vmvn q8, q8 +; THUMB78-NEXT: vld1.64 {d18, d19}, [r0] +; THUMB78-NEXT: vsub.i32 q8, q9, q8 ; THUMB78-NEXT: vmov r0, r1, d16 ; THUMB78-NEXT: vmov r2, r3, d17 ; THUMB78-NEXT: bx lr @@ -509,60 +503,41 @@ ; ARM78-NEXT: vmov d17, r2, r3 ; ARM78-NEXT: vmov d16, r0, r1 ; ARM78-NEXT: mov r0, sp +; ARM78-NEXT: vmvn q8, q8 ; ARM78-NEXT: vld1.64 {d18, d19}, [r0] -; ARM78-NEXT: adr r0, .LCPI7_0 -; ARM78-NEXT: vadd.i64 q8, q8, q9 -; ARM78-NEXT: vld1.64 {d18, d19}, [r0:128] -; ARM78-NEXT: vadd.i64 q8, q8, q9 +; ARM78-NEXT: vsub.i64 q8, q9, q8 ; ARM78-NEXT: vmov r0, r1, d16 ; ARM78-NEXT: vmov r2, r3, d17 ; ARM78-NEXT: bx lr -; ARM78-NEXT: .p2align 4 -; ARM78-NEXT: @ %bb.1: -; ARM78-NEXT: .LCPI7_0: -; ARM78-NEXT: .long 1 @ 0x1 -; ARM78-NEXT: .long 0 @ 0x0 -; ARM78-NEXT: .long 1 @ 0x1 -; ARM78-NEXT: .long 0 @ 0x0 ; ; THUMB6-LABEL: vector_i128_i64: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r6, lr} -; THUMB6-NEXT: ldr r4, [sp, #20] +; THUMB6-NEXT: push {r4, r5, r7, lr} +; THUMB6-NEXT: mvns r4, r1 +; THUMB6-NEXT: mvns r0, r0 +; THUMB6-NEXT: ldr r1, [sp, #20] ; THUMB6-NEXT: ldr r5, [sp, #16] -; THUMB6-NEXT: adds r0, r0, r5 -; THUMB6-NEXT: adcs r1, r4 -; THUMB6-NEXT: movs r4, #0 -; THUMB6-NEXT: adds r0, r0, #1 -; THUMB6-NEXT: adcs r1, r4 -; THUMB6-NEXT: ldr r5, [sp, #28] -; THUMB6-NEXT: ldr r6, [sp, #24] -; THUMB6-NEXT: adds r2, r2, r6 -; THUMB6-NEXT: adcs r3, r5 -; THUMB6-NEXT: adds r2, r2, #1 -; THUMB6-NEXT: adcs r3, r4 -; THUMB6-NEXT: pop {r4, r5, r6, pc} +; THUMB6-NEXT: subs r0, r5, r0 +; THUMB6-NEXT: sbcs r1, r4 +; THUMB6-NEXT: mvns r4, r3 +; THUMB6-NEXT: mvns r2, r2 +; THUMB6-NEXT: ldr r3, [sp, #28] +; THUMB6-NEXT: ldr r5, [sp, #24] +; THUMB6-NEXT: subs r2, r5, r2 +; THUMB6-NEXT: sbcs r3, r4 +; THUMB6-NEXT: pop {r4, r5, r7, pc} ; ; THUMB78-LABEL: vector_i128_i64: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: vmov d17, r2, r3 ; THUMB78-NEXT: vmov d16, r0, r1 ; THUMB78-NEXT: mov r0, sp +; THUMB78-NEXT: vmvn q8, q8 ; THUMB78-NEXT: vld1.64 {d18, d19}, [r0] -; THUMB78-NEXT: adr r0, .LCPI7_0 -; THUMB78-NEXT: vadd.i64 q8, q8, q9 -; THUMB78-NEXT: vld1.64 {d18, d19}, [r0:128] -; THUMB78-NEXT: vadd.i64 q8, q8, q9 +; THUMB78-NEXT: vsub.i64 q8, q9, q8 ; THUMB78-NEXT: vmov r0, r1, d16 ; THUMB78-NEXT: vmov r2, r3, d17 ; THUMB78-NEXT: bx lr -; THUMB78-NEXT: .p2align 4 -; THUMB78-NEXT: @ %bb.1: -; THUMB78-NEXT: .LCPI7_0: -; THUMB78-NEXT: .long 1 @ 0x1 -; THUMB78-NEXT: .long 0 @ 0x0 -; THUMB78-NEXT: .long 1 @ 0x1 -; THUMB78-NEXT: .long 0 @ 0x0 %t0 = add <2 x i64> %x, %t1 = add <2 x i64> %y, %t0 ret <2 x i64> %t1 Index: test/CodeGen/ARM/sub-of-not.ll =================================================================== --- test/CodeGen/ARM/sub-of-not.ll +++ test/CodeGen/ARM/sub-of-not.ll @@ -14,15 +14,21 @@ define i8 @scalar_i8(i8 %x, i8 %y) nounwind { ; ARM-LABEL: scalar_i8: ; ARM: @ %bb.0: -; ARM-NEXT: mvn r0, r0 -; ARM-NEXT: sub r0, r1, r0 +; ARM-NEXT: add r0, r1, r0 +; ARM-NEXT: add r0, r0, #1 ; ARM-NEXT: bx lr ; -; THUMB-LABEL: scalar_i8: -; THUMB: @ %bb.0: -; THUMB-NEXT: mvns r0, r0 -; THUMB-NEXT: subs r0, r1, r0 -; THUMB-NEXT: bx lr +; THUMB6-LABEL: scalar_i8: +; THUMB6: @ %bb.0: +; THUMB6-NEXT: adds r0, r1, r0 +; THUMB6-NEXT: adds r0, r0, #1 +; THUMB6-NEXT: bx lr +; +; THUMB78-LABEL: scalar_i8: +; THUMB78: @ %bb.0: +; THUMB78-NEXT: add r0, r1 +; THUMB78-NEXT: adds r0, #1 +; THUMB78-NEXT: bx lr %t0 = xor i8 %x, -1 %t1 = sub i8 %y, %t0 ret i8 %t1 @@ -31,15 +37,21 @@ define i16 @scalar_i16(i16 %x, i16 %y) nounwind { ; ARM-LABEL: scalar_i16: ; ARM: @ %bb.0: -; ARM-NEXT: mvn r0, r0 -; ARM-NEXT: sub r0, r1, r0 +; ARM-NEXT: add r0, r1, r0 +; ARM-NEXT: add r0, r0, #1 ; ARM-NEXT: bx lr ; -; THUMB-LABEL: scalar_i16: -; THUMB: @ %bb.0: -; THUMB-NEXT: mvns r0, r0 -; THUMB-NEXT: subs r0, r1, r0 -; THUMB-NEXT: bx lr +; THUMB6-LABEL: scalar_i16: +; THUMB6: @ %bb.0: +; THUMB6-NEXT: adds r0, r1, r0 +; THUMB6-NEXT: adds r0, r0, #1 +; THUMB6-NEXT: bx lr +; +; THUMB78-LABEL: scalar_i16: +; THUMB78: @ %bb.0: +; THUMB78-NEXT: add r0, r1 +; THUMB78-NEXT: adds r0, #1 +; THUMB78-NEXT: bx lr %t0 = xor i16 %x, -1 %t1 = sub i16 %y, %t0 ret i16 %t1 @@ -48,15 +60,21 @@ define i32 @scalar_i32(i32 %x, i32 %y) nounwind { ; ARM-LABEL: scalar_i32: ; ARM: @ %bb.0: -; ARM-NEXT: mvn r0, r0 -; ARM-NEXT: sub r0, r1, r0 +; ARM-NEXT: add r0, r1, r0 +; ARM-NEXT: add r0, r0, #1 ; ARM-NEXT: bx lr ; -; THUMB-LABEL: scalar_i32: -; THUMB: @ %bb.0: -; THUMB-NEXT: mvns r0, r0 -; THUMB-NEXT: subs r0, r1, r0 -; THUMB-NEXT: bx lr +; THUMB6-LABEL: scalar_i32: +; THUMB6: @ %bb.0: +; THUMB6-NEXT: adds r0, r1, r0 +; THUMB6-NEXT: adds r0, r0, #1 +; THUMB6-NEXT: bx lr +; +; THUMB78-LABEL: scalar_i32: +; THUMB78: @ %bb.0: +; THUMB78-NEXT: add r0, r1 +; THUMB78-NEXT: adds r0, #1 +; THUMB78-NEXT: bx lr %t0 = xor i32 %x, -1 %t1 = sub i32 %y, %t0 ret i32 %t1 @@ -65,10 +83,10 @@ define i64 @scalar_i64(i64 %x, i64 %y) nounwind { ; ARM-LABEL: scalar_i64: ; ARM: @ %bb.0: -; ARM-NEXT: mvn r0, r0 -; ARM-NEXT: mvn r1, r1 -; ARM-NEXT: subs r0, r2, r0 -; ARM-NEXT: sbc r1, r3, r1 +; ARM-NEXT: adds r0, r2, r0 +; ARM-NEXT: adc r1, r3, r1 +; ARM-NEXT: adds r0, r0, #1 +; ARM-NEXT: adc r1, r1, #0 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64: @@ -80,21 +98,13 @@ ; THUMB6-NEXT: mov r1, r3 ; THUMB6-NEXT: bx lr ; -; THUMB7-LABEL: scalar_i64: -; THUMB7: @ %bb.0: -; THUMB7-NEXT: mvns r0, r0 -; THUMB7-NEXT: mvns r1, r1 -; THUMB7-NEXT: subs r0, r2, r0 -; THUMB7-NEXT: sbc.w r1, r3, r1 -; THUMB7-NEXT: bx lr -; -; THUMB8-LABEL: scalar_i64: -; THUMB8: @ %bb.0: -; THUMB8-NEXT: mvns r1, r1 -; THUMB8-NEXT: mvns r0, r0 -; THUMB8-NEXT: subs r0, r2, r0 -; THUMB8-NEXT: sbc.w r1, r3, r1 -; THUMB8-NEXT: bx lr +; THUMB78-LABEL: scalar_i64: +; THUMB78: @ %bb.0: +; THUMB78-NEXT: adds r0, r0, r2 +; THUMB78-NEXT: adcs r1, r3 +; THUMB78-NEXT: adds r0, #1 +; THUMB78-NEXT: adc r1, r1, #0 +; THUMB78-NEXT: bx lr %t0 = xor i64 %x, -1 %t1 = sub i64 %y, %t0 ret i64 %t1 @@ -103,83 +113,83 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; ARM6-LABEL: vector_i128_i8: ; ARM6: @ %bb.0: -; ARM6-NEXT: ldrb r1, [sp, #52] -; ARM6-NEXT: mvn r12, r1 +; ARM6-NEXT: ldrb r12, [sp, #52] ; ARM6-NEXT: ldrb r1, [sp, #116] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #48] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #15] -; ARM6-NEXT: ldrb r1, [sp, #48] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #112] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #44] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #14] -; ARM6-NEXT: ldrb r1, [sp, #44] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #108] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #40] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #13] -; ARM6-NEXT: ldrb r1, [sp, #40] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #104] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #36] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #12] -; ARM6-NEXT: ldrb r1, [sp, #36] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #100] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #32] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #11] -; ARM6-NEXT: ldrb r1, [sp, #32] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #96] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #28] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #10] -; ARM6-NEXT: ldrb r1, [sp, #28] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #92] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #24] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #9] -; ARM6-NEXT: ldrb r1, [sp, #24] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #88] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #20] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #8] -; ARM6-NEXT: ldrb r1, [sp, #20] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #84] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #16] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #7] -; ARM6-NEXT: ldrb r1, [sp, #16] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #80] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #12] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #6] -; ARM6-NEXT: ldrb r1, [sp, #12] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #76] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #8] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #5] -; ARM6-NEXT: ldrb r1, [sp, #8] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #72] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp, #4] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #4] -; ARM6-NEXT: ldrb r1, [sp, #4] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #68] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrb r12, [sp] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #3] -; ARM6-NEXT: ldrb r1, [sp] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrb r1, [sp, #64] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #2] -; ARM6-NEXT: mvn r1, r3 -; ARM6-NEXT: ldrb r3, [sp, #60] -; ARM6-NEXT: sub r1, r3, r1 +; ARM6-NEXT: ldrb r1, [sp, #60] +; ARM6-NEXT: add r1, r1, r3 +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0, #1] -; ARM6-NEXT: mvn r1, r2 -; ARM6-NEXT: ldrb r2, [sp, #56] -; ARM6-NEXT: sub r1, r2, r1 +; ARM6-NEXT: ldrb r1, [sp, #56] +; ARM6-NEXT: add r1, r1, r2 +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strb r1, [r0] ; ARM6-NEXT: bx lr ; @@ -199,82 +209,82 @@ ; THUMB6: @ %bb.0: ; THUMB6-NEXT: push {r4, lr} ; THUMB6-NEXT: ldr r1, [sp, #60] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #124] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #15] ; THUMB6-NEXT: ldr r1, [sp, #56] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #120] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #14] ; THUMB6-NEXT: ldr r1, [sp, #52] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #116] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #13] ; THUMB6-NEXT: ldr r1, [sp, #48] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #112] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #12] ; THUMB6-NEXT: ldr r1, [sp, #44] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #108] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #11] ; THUMB6-NEXT: ldr r1, [sp, #40] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #104] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #10] ; THUMB6-NEXT: ldr r1, [sp, #36] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #100] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #9] ; THUMB6-NEXT: ldr r1, [sp, #32] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #96] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #8] ; THUMB6-NEXT: ldr r1, [sp, #28] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #92] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #7] ; THUMB6-NEXT: ldr r1, [sp, #24] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #88] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #6] ; THUMB6-NEXT: ldr r1, [sp, #20] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #84] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #5] ; THUMB6-NEXT: ldr r1, [sp, #16] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #80] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #4] ; THUMB6-NEXT: ldr r1, [sp, #12] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #76] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #3] ; THUMB6-NEXT: ldr r1, [sp, #8] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #72] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #2] -; THUMB6-NEXT: mvns r1, r3 -; THUMB6-NEXT: ldr r3, [sp, #68] -; THUMB6-NEXT: subs r1, r3, r1 +; THUMB6-NEXT: ldr r1, [sp, #68] +; THUMB6-NEXT: adds r1, r1, r3 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0, #1] -; THUMB6-NEXT: mvns r1, r2 -; THUMB6-NEXT: ldr r2, [sp, #64] -; THUMB6-NEXT: subs r1, r2, r1 +; THUMB6-NEXT: ldr r1, [sp, #64] +; THUMB6-NEXT: adds r1, r1, r2 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strb r1, [r0] ; THUMB6-NEXT: pop {r4, pc} ; @@ -297,43 +307,43 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; ARM6-LABEL: vector_i128_i16: ; ARM6: @ %bb.0: -; ARM6-NEXT: ldrh r1, [sp, #20] -; ARM6-NEXT: mvn r12, r1 +; ARM6-NEXT: ldrh r12, [sp, #20] ; ARM6-NEXT: ldrh r1, [sp, #52] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrh r12, [sp, #16] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strh r1, [r0, #14] -; ARM6-NEXT: ldrh r1, [sp, #16] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrh r1, [sp, #48] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrh r12, [sp, #12] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strh r1, [r0, #12] -; ARM6-NEXT: ldrh r1, [sp, #12] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrh r1, [sp, #44] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrh r12, [sp, #8] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strh r1, [r0, #10] -; ARM6-NEXT: ldrh r1, [sp, #8] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrh r1, [sp, #40] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrh r12, [sp, #4] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strh r1, [r0, #8] -; ARM6-NEXT: ldrh r1, [sp, #4] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrh r1, [sp, #36] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: ldrh r12, [sp] +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strh r1, [r0, #6] -; ARM6-NEXT: ldrh r1, [sp] -; ARM6-NEXT: mvn r12, r1 ; ARM6-NEXT: ldrh r1, [sp, #32] -; ARM6-NEXT: sub r1, r1, r12 +; ARM6-NEXT: add r1, r1, r12 +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strh r1, [r0, #4] -; ARM6-NEXT: mvn r1, r3 -; ARM6-NEXT: ldrh r3, [sp, #28] -; ARM6-NEXT: sub r1, r3, r1 +; ARM6-NEXT: ldrh r1, [sp, #28] +; ARM6-NEXT: add r1, r1, r3 +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strh r1, [r0, #2] -; ARM6-NEXT: mvn r1, r2 -; ARM6-NEXT: ldrh r2, [sp, #24] -; ARM6-NEXT: sub r1, r2, r1 +; ARM6-NEXT: ldrh r1, [sp, #24] +; ARM6-NEXT: add r1, r1, r2 +; ARM6-NEXT: add r1, r1, #1 ; ARM6-NEXT: strh r1, [r0] ; ARM6-NEXT: bx lr ; @@ -353,42 +363,42 @@ ; THUMB6: @ %bb.0: ; THUMB6-NEXT: push {r4, lr} ; THUMB6-NEXT: ldr r1, [sp, #28] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #60] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strh r1, [r0, #14] ; THUMB6-NEXT: ldr r1, [sp, #24] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #56] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strh r1, [r0, #12] ; THUMB6-NEXT: ldr r1, [sp, #20] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #52] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strh r1, [r0, #10] ; THUMB6-NEXT: ldr r1, [sp, #16] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #48] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strh r1, [r0, #8] ; THUMB6-NEXT: ldr r1, [sp, #12] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #44] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strh r1, [r0, #6] ; THUMB6-NEXT: ldr r1, [sp, #8] -; THUMB6-NEXT: mvns r1, r1 ; THUMB6-NEXT: ldr r4, [sp, #40] -; THUMB6-NEXT: subs r1, r4, r1 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strh r1, [r0, #4] -; THUMB6-NEXT: mvns r1, r3 -; THUMB6-NEXT: ldr r3, [sp, #36] -; THUMB6-NEXT: subs r1, r3, r1 +; THUMB6-NEXT: ldr r1, [sp, #36] +; THUMB6-NEXT: adds r1, r1, r3 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strh r1, [r0, #2] -; THUMB6-NEXT: mvns r1, r2 -; THUMB6-NEXT: ldr r2, [sp, #32] -; THUMB6-NEXT: subs r1, r2, r1 +; THUMB6-NEXT: ldr r1, [sp, #32] +; THUMB6-NEXT: adds r1, r1, r2 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: strh r1, [r0] ; THUMB6-NEXT: pop {r4, pc} ; @@ -411,18 +421,18 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; ARM6-LABEL: vector_i128_i32: ; ARM6: @ %bb.0: -; ARM6-NEXT: mvn r12, r0 -; ARM6-NEXT: ldr r0, [sp] -; ARM6-NEXT: sub r0, r0, r12 -; ARM6-NEXT: mvn r12, r1 -; ARM6-NEXT: ldr r1, [sp, #4] -; ARM6-NEXT: sub r1, r1, r12 -; ARM6-NEXT: mvn r12, r2 -; ARM6-NEXT: ldr r2, [sp, #8] -; ARM6-NEXT: sub r2, r2, r12 -; ARM6-NEXT: mvn r12, r3 -; ARM6-NEXT: ldr r3, [sp, #12] -; ARM6-NEXT: sub r3, r3, r12 +; ARM6-NEXT: ldr r12, [sp] +; ARM6-NEXT: add r0, r12, r0 +; ARM6-NEXT: ldr r12, [sp, #4] +; ARM6-NEXT: add r0, r0, #1 +; ARM6-NEXT: add r1, r12, r1 +; ARM6-NEXT: ldr r12, [sp, #8] +; ARM6-NEXT: add r1, r1, #1 +; ARM6-NEXT: add r2, r12, r2 +; ARM6-NEXT: ldr r12, [sp, #12] +; ARM6-NEXT: add r2, r2, #1 +; ARM6-NEXT: add r3, r12, r3 +; ARM6-NEXT: add r3, r3, #1 ; ARM6-NEXT: bx lr ; ; ARM78-LABEL: vector_i128_i32: @@ -440,18 +450,18 @@ ; THUMB6-LABEL: vector_i128_i32: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: push {r4, lr} -; THUMB6-NEXT: mvns r0, r0 ; THUMB6-NEXT: ldr r4, [sp, #8] -; THUMB6-NEXT: subs r0, r4, r0 -; THUMB6-NEXT: mvns r1, r1 +; THUMB6-NEXT: adds r0, r4, r0 +; THUMB6-NEXT: adds r0, r0, #1 ; THUMB6-NEXT: ldr r4, [sp, #12] -; THUMB6-NEXT: subs r1, r4, r1 -; THUMB6-NEXT: mvns r2, r2 +; THUMB6-NEXT: adds r1, r4, r1 +; THUMB6-NEXT: adds r1, r1, #1 ; THUMB6-NEXT: ldr r4, [sp, #16] -; THUMB6-NEXT: subs r2, r4, r2 -; THUMB6-NEXT: mvns r3, r3 +; THUMB6-NEXT: adds r2, r4, r2 +; THUMB6-NEXT: adds r2, r2, #1 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: subs r3, r4, r3 +; THUMB6-NEXT: adds r3, r4, r3 +; THUMB6-NEXT: adds r3, r3, #1 ; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vector_i128_i32: @@ -474,18 +484,18 @@ ; ARM6-LABEL: vector_i128_i64: ; ARM6: @ %bb.0: ; ARM6-NEXT: push {r11, lr} -; ARM6-NEXT: mvn lr, r1 -; ARM6-NEXT: ldr r1, [sp, #8] -; ARM6-NEXT: mvn r0, r0 +; ARM6-NEXT: ldr lr, [sp, #8] ; ARM6-NEXT: ldr r12, [sp, #12] -; ARM6-NEXT: subs r0, r1, r0 -; ARM6-NEXT: mvn r2, r2 -; ARM6-NEXT: sbc r1, r12, lr -; ARM6-NEXT: mvn lr, r3 -; ARM6-NEXT: ldr r3, [sp, #16] +; ARM6-NEXT: adds r0, lr, r0 +; ARM6-NEXT: ldr lr, [sp, #16] +; ARM6-NEXT: adc r1, r12, r1 +; ARM6-NEXT: adds r0, r0, #1 ; ARM6-NEXT: ldr r12, [sp, #20] -; ARM6-NEXT: subs r2, r3, r2 -; ARM6-NEXT: sbc r3, r12, lr +; ARM6-NEXT: adc r1, r1, #0 +; ARM6-NEXT: adds r2, lr, r2 +; ARM6-NEXT: adc r3, r12, r3 +; ARM6-NEXT: adds r2, r2, #1 +; ARM6-NEXT: adc r3, r3, #0 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vector_i128_i64: Index: test/CodeGen/PowerPC/inc-of-add.ll =================================================================== --- test/CodeGen/PowerPC/inc-of-add.ll +++ test/CodeGen/PowerPC/inc-of-add.ll @@ -64,89 +64,89 @@ ; PPC32-LABEL: vector_i128_i8: ; PPC32: # %bb.0: ; PPC32-NEXT: stwu 1, -64(1) -; PPC32-NEXT: lbz 12, 175(1) -; PPC32-NEXT: lbz 0, 111(1) -; PPC32-NEXT: lbz 4, 171(1) -; PPC32-NEXT: lbz 11, 107(1) ; PPC32-NEXT: stw 21, 20(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill -; PPC32-NEXT: add 12, 0, 12 -; PPC32-NEXT: lbz 22, 167(1) -; PPC32-NEXT: lbz 21, 103(1) -; PPC32-NEXT: lbz 23, 163(1) -; PPC32-NEXT: lbz 0, 99(1) +; PPC32-NEXT: lbz 4, 119(1) +; PPC32-NEXT: lbz 11, 115(1) ; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill -; PPC32-NEXT: add 4, 11, 4 -; PPC32-NEXT: lbz 24, 159(1) -; PPC32-NEXT: lbz 11, 95(1) -; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill -; PPC32-NEXT: add 22, 21, 22 -; PPC32-NEXT: lbz 25, 155(1) -; PPC32-NEXT: lbz 21, 91(1) -; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill -; PPC32-NEXT: add 23, 0, 23 -; PPC32-NEXT: lbz 26, 151(1) -; PPC32-NEXT: lbz 0, 87(1) -; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill -; PPC32-NEXT: add 11, 11, 24 -; PPC32-NEXT: lbz 27, 147(1) +; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill +; PPC32-NEXT: add 4, 4, 6 +; PPC32-NEXT: lbz 21, 123(1) +; PPC32-NEXT: lbz 6, 131(1) +; PPC32-NEXT: add 5, 11, 5 +; PPC32-NEXT: lbz 11, 127(1) +; PPC32-NEXT: add 7, 21, 7 +; PPC32-NEXT: lbz 21, 135(1) ; PPC32-NEXT: lbz 24, 83(1) -; PPC32-NEXT: addi 4, 4, 1 -; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill +; PPC32-NEXT: lbz 23, 79(1) +; PPC32-NEXT: add 6, 6, 9 +; PPC32-NEXT: add 10, 21, 10 +; PPC32-NEXT: lbz 21, 147(1) +; PPC32-NEXT: lbz 9, 143(1) +; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill +; PPC32-NEXT: add 8, 11, 8 +; PPC32-NEXT: lbz 22, 75(1) +; PPC32-NEXT: lbz 11, 139(1) +; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: add 24, 21, 24 +; PPC32-NEXT: lbz 27, 95(1) +; PPC32-NEXT: lbz 21, 159(1) +; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill +; PPC32-NEXT: add 9, 9, 23 +; PPC32-NEXT: lbz 26, 91(1) +; PPC32-NEXT: lbz 23, 155(1) +; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill +; PPC32-NEXT: add 11, 11, 22 +; PPC32-NEXT: lbz 25, 87(1) +; PPC32-NEXT: lbz 22, 151(1) +; PPC32-NEXT: lbz 12, 111(1) +; PPC32-NEXT: add 27, 21, 27 +; PPC32-NEXT: lbz 21, 175(1) ; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill -; PPC32-NEXT: add 25, 21, 25 -; PPC32-NEXT: lbz 28, 143(1) -; PPC32-NEXT: lbz 21, 79(1) -; PPC32-NEXT: stb 4, 14(3) -; PPC32-NEXT: addi 4, 22, 1 -; PPC32-NEXT: lbz 29, 139(1) -; PPC32-NEXT: add 26, 0, 26 -; PPC32-NEXT: lbz 0, 75(1) -; PPC32-NEXT: stb 4, 13(3) -; PPC32-NEXT: addi 4, 23, 1 -; PPC32-NEXT: add 27, 24, 27 -; PPC32-NEXT: lbz 24, 135(1) -; PPC32-NEXT: stb 4, 12(3) -; PPC32-NEXT: addi 4, 11, 1 -; PPC32-NEXT: stb 4, 11(3) -; PPC32-NEXT: addi 4, 25, 1 -; PPC32-NEXT: add 28, 21, 28 -; PPC32-NEXT: lbz 21, 131(1) -; PPC32-NEXT: stb 4, 10(3) -; PPC32-NEXT: addi 4, 26, 1 -; PPC32-NEXT: add 29, 0, 29 -; PPC32-NEXT: lbz 0, 127(1) -; PPC32-NEXT: stb 4, 9(3) -; PPC32-NEXT: addi 4, 27, 1 -; PPC32-NEXT: add 10, 10, 24 -; PPC32-NEXT: lbz 24, 123(1) -; PPC32-NEXT: stb 4, 8(3) -; PPC32-NEXT: addi 4, 28, 1 -; PPC32-NEXT: lbz 30, 119(1) -; PPC32-NEXT: stb 4, 7(3) -; PPC32-NEXT: addi 4, 29, 1 -; PPC32-NEXT: add 9, 9, 21 -; PPC32-NEXT: lbz 21, 115(1) -; PPC32-NEXT: stb 4, 6(3) -; PPC32-NEXT: addi 4, 10, 1 -; PPC32-NEXT: add 8, 8, 0 -; PPC32-NEXT: stb 4, 5(3) -; PPC32-NEXT: addi 4, 9, 1 -; PPC32-NEXT: add 7, 7, 24 -; PPC32-NEXT: stb 4, 4(3) -; PPC32-NEXT: addi 4, 8, 1 -; PPC32-NEXT: add 6, 6, 30 -; PPC32-NEXT: stb 4, 3(3) -; PPC32-NEXT: addi 4, 7, 1 -; PPC32-NEXT: add 5, 5, 21 -; PPC32-NEXT: stb 4, 2(3) -; PPC32-NEXT: addi 4, 6, 1 +; PPC32-NEXT: lbz 0, 107(1) +; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill +; PPC32-NEXT: add 26, 23, 26 +; PPC32-NEXT: lbz 30, 171(1) +; PPC32-NEXT: lbz 29, 103(1) +; PPC32-NEXT: lbz 23, 167(1) +; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 25, 22, 25 +; PPC32-NEXT: lbz 28, 99(1) +; PPC32-NEXT: lbz 22, 163(1) +; PPC32-NEXT: add 12, 21, 12 +; PPC32-NEXT: add 30, 30, 0 ; PPC32-NEXT: addi 12, 12, 1 +; PPC32-NEXT: add 29, 23, 29 +; PPC32-NEXT: stb 12, 15(3) +; PPC32-NEXT: addi 12, 30, 1 +; PPC32-NEXT: add 28, 22, 28 +; PPC32-NEXT: stb 12, 14(3) +; PPC32-NEXT: addi 12, 29, 1 +; PPC32-NEXT: stb 12, 13(3) +; PPC32-NEXT: addi 12, 28, 1 +; PPC32-NEXT: stb 12, 12(3) +; PPC32-NEXT: addi 12, 27, 1 +; PPC32-NEXT: stb 12, 11(3) +; PPC32-NEXT: addi 12, 26, 1 +; PPC32-NEXT: addi 9, 9, 1 +; PPC32-NEXT: addi 6, 6, 1 +; PPC32-NEXT: stb 12, 10(3) +; PPC32-NEXT: addi 12, 25, 1 +; PPC32-NEXT: stb 9, 7(3) +; PPC32-NEXT: addi 9, 11, 1 +; PPC32-NEXT: stb 6, 4(3) +; PPC32-NEXT: addi 6, 8, 1 +; PPC32-NEXT: addi 4, 4, 1 +; PPC32-NEXT: stb 12, 9(3) +; PPC32-NEXT: addi 12, 24, 1 +; PPC32-NEXT: stb 9, 6(3) +; PPC32-NEXT: addi 9, 10, 1 +; PPC32-NEXT: stb 6, 3(3) +; PPC32-NEXT: addi 6, 7, 1 ; PPC32-NEXT: stb 4, 1(3) ; PPC32-NEXT: addi 4, 5, 1 -; PPC32-NEXT: stb 12, 15(3) +; PPC32-NEXT: stb 12, 8(3) +; PPC32-NEXT: stb 9, 5(3) +; PPC32-NEXT: stb 6, 2(3) ; PPC32-NEXT: stb 4, 0(3) ; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload ; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload @@ -164,73 +164,72 @@ ; PPC64BE-LABEL: vector_i128_i8: ; PPC64BE: # %bb.0: ; PPC64BE-NEXT: std 21, -88(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 22, -80(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 11, 303(1) -; PPC64BE-NEXT: lbz 12, 175(1) -; PPC64BE-NEXT: lbz 0, 311(1) -; PPC64BE-NEXT: lbz 30, 183(1) -; PPC64BE-NEXT: lbz 22, 295(1) -; PPC64BE-NEXT: lbz 21, 167(1) +; PPC64BE-NEXT: lbz 21, 207(1) +; PPC64BE-NEXT: lbz 11, 199(1) +; PPC64BE-NEXT: lbz 12, 191(1) ; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 2, -96(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 11, 12, 11 -; PPC64BE-NEXT: lbz 25, 271(1) -; PPC64BE-NEXT: lbz 24, 279(1) -; PPC64BE-NEXT: lbz 23, 287(1) -; PPC64BE-NEXT: add 2, 30, 0 -; PPC64BE-NEXT: lbz 0, 159(1) -; PPC64BE-NEXT: lbz 12, 151(1) -; PPC64BE-NEXT: add 30, 21, 22 -; PPC64BE-NEXT: lbz 22, 143(1) +; PPC64BE-NEXT: std 22, -80(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 23, 0, 23 -; PPC64BE-NEXT: lbz 28, 247(1) -; PPC64BE-NEXT: lbz 27, 255(1) -; PPC64BE-NEXT: lbz 26, 263(1) -; PPC64BE-NEXT: lbz 0, 135(1) -; PPC64BE-NEXT: add 12, 12, 24 -; PPC64BE-NEXT: lbz 24, 127(1) -; PPC64BE-NEXT: add 25, 22, 25 -; PPC64BE-NEXT: lbz 22, 119(1) -; PPC64BE-NEXT: add 26, 0, 26 -; PPC64BE-NEXT: lbz 0, 239(1) -; PPC64BE-NEXT: add 27, 24, 27 -; PPC64BE-NEXT: lbz 24, 231(1) -; PPC64BE-NEXT: add 28, 22, 28 -; PPC64BE-NEXT: lbz 22, 223(1) -; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: std 2, -96(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; PPC64BE-NEXT: stb 11, 14(3) -; PPC64BE-NEXT: addi 11, 30, 1 -; PPC64BE-NEXT: lbz 29, 199(1) -; PPC64BE-NEXT: add 10, 10, 0 -; PPC64BE-NEXT: lbz 0, 215(1) -; PPC64BE-NEXT: add 9, 9, 24 -; PPC64BE-NEXT: lbz 24, 207(1) -; PPC64BE-NEXT: add 8, 8, 22 -; PPC64BE-NEXT: lbz 22, 191(1) -; PPC64BE-NEXT: stb 11, 13(3) -; PPC64BE-NEXT: addi 11, 23, 1 -; PPC64BE-NEXT: stb 11, 12(3) -; PPC64BE-NEXT: addi 11, 12, 1 -; PPC64BE-NEXT: stb 11, 11(3) -; PPC64BE-NEXT: addi 11, 25, 1 -; PPC64BE-NEXT: stb 11, 10(3) -; PPC64BE-NEXT: addi 11, 26, 1 -; PPC64BE-NEXT: add 7, 7, 0 -; PPC64BE-NEXT: add 6, 6, 24 -; PPC64BE-NEXT: add 5, 5, 29 -; PPC64BE-NEXT: add 4, 4, 22 -; PPC64BE-NEXT: stb 11, 9(3) -; PPC64BE-NEXT: addi 11, 27, 1 -; PPC64BE-NEXT: addi 0, 2, 1 -; PPC64BE-NEXT: stb 11, 8(3) -; PPC64BE-NEXT: addi 11, 28, 1 +; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill +; PPC64BE-NEXT: lbz 0, 183(1) +; PPC64BE-NEXT: add 6, 21, 6 +; PPC64BE-NEXT: lbz 21, 231(1) +; PPC64BE-NEXT: add 5, 11, 5 +; PPC64BE-NEXT: lbz 11, 223(1) +; PPC64BE-NEXT: add 4, 12, 4 +; PPC64BE-NEXT: lbz 12, 215(1) +; PPC64BE-NEXT: lbz 23, 127(1) +; PPC64BE-NEXT: add 9, 21, 9 +; PPC64BE-NEXT: lbz 21, 255(1) +; PPC64BE-NEXT: lbz 22, 119(1) +; PPC64BE-NEXT: add 8, 11, 8 +; PPC64BE-NEXT: lbz 11, 247(1) +; PPC64BE-NEXT: add 7, 12, 7 +; PPC64BE-NEXT: lbz 12, 239(1) +; PPC64BE-NEXT: lbz 26, 151(1) +; PPC64BE-NEXT: add 2, 21, 23 +; PPC64BE-NEXT: lbz 23, 279(1) +; PPC64BE-NEXT: lbz 25, 143(1) +; PPC64BE-NEXT: add 11, 11, 22 +; PPC64BE-NEXT: lbz 22, 271(1) +; PPC64BE-NEXT: lbz 24, 135(1) +; PPC64BE-NEXT: add 10, 12, 10 +; PPC64BE-NEXT: lbz 12, 263(1) +; PPC64BE-NEXT: lbz 30, 175(1) +; PPC64BE-NEXT: lbz 29, 303(1) +; PPC64BE-NEXT: add 26, 23, 26 +; PPC64BE-NEXT: lbz 23, 311(1) +; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PPC64BE-NEXT: add 25, 22, 25 +; PPC64BE-NEXT: lbz 28, 167(1) +; PPC64BE-NEXT: lbz 22, 295(1) +; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; PPC64BE-NEXT: add 12, 12, 24 +; PPC64BE-NEXT: lbz 27, 159(1) +; PPC64BE-NEXT: lbz 24, 287(1) +; PPC64BE-NEXT: add 30, 29, 30 +; PPC64BE-NEXT: add 29, 23, 0 +; PPC64BE-NEXT: addi 0, 29, 1 +; PPC64BE-NEXT: add 28, 22, 28 +; PPC64BE-NEXT: stb 0, 15(3) +; PPC64BE-NEXT: addi 0, 30, 1 +; PPC64BE-NEXT: add 27, 24, 27 +; PPC64BE-NEXT: stb 0, 14(3) +; PPC64BE-NEXT: addi 0, 28, 1 +; PPC64BE-NEXT: stb 0, 13(3) +; PPC64BE-NEXT: addi 0, 27, 1 +; PPC64BE-NEXT: stb 0, 12(3) +; PPC64BE-NEXT: addi 0, 26, 1 +; PPC64BE-NEXT: addi 12, 12, 1 +; PPC64BE-NEXT: stb 0, 11(3) +; PPC64BE-NEXT: addi 0, 25, 1 +; PPC64BE-NEXT: stb 12, 9(3) +; PPC64BE-NEXT: addi 12, 2, 1 +; PPC64BE-NEXT: addi 11, 11, 1 ; PPC64BE-NEXT: addi 10, 10, 1 ; PPC64BE-NEXT: addi 9, 9, 1 ; PPC64BE-NEXT: addi 8, 8, 1 @@ -238,7 +237,8 @@ ; PPC64BE-NEXT: addi 6, 6, 1 ; PPC64BE-NEXT: addi 5, 5, 1 ; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: stb 0, 15(3) +; PPC64BE-NEXT: stb 0, 10(3) +; PPC64BE-NEXT: stb 12, 8(3) ; PPC64BE-NEXT: stb 11, 7(3) ; PPC64BE-NEXT: stb 10, 6(3) ; PPC64BE-NEXT: stb 9, 5(3) @@ -262,9 +262,8 @@ ; ; PPC64LE-LABEL: vector_i128_i8: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: vspltisb 4, 1 -; PPC64LE-NEXT: vaddubm 2, 2, 3 -; PPC64LE-NEXT: vaddubm 2, 2, 4 +; PPC64LE-NEXT: xxlnor 34, 34, 34 +; PPC64LE-NEXT: vsububm 2, 3, 2 ; PPC64LE-NEXT: blr %t0 = add <16 x i8> %x, %t1 = add <16 x i8> %y, %t0 @@ -280,22 +279,22 @@ ; PPC32-NEXT: stw 28, 16(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 29, 20(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: lhz 11, 62(1) -; PPC32-NEXT: lhz 12, 66(1) -; PPC32-NEXT: lhz 0, 70(1) -; PPC32-NEXT: lhz 30, 42(1) -; PPC32-NEXT: lhz 29, 46(1) -; PPC32-NEXT: lhz 28, 50(1) -; PPC32-NEXT: lhz 27, 54(1) -; PPC32-NEXT: lhz 26, 58(1) -; PPC32-NEXT: add 10, 10, 0 -; PPC32-NEXT: add 9, 9, 12 -; PPC32-NEXT: add 8, 8, 11 -; PPC32-NEXT: add 7, 7, 26 -; PPC32-NEXT: add 6, 6, 27 -; PPC32-NEXT: add 5, 5, 28 -; PPC32-NEXT: add 4, 4, 29 -; PPC32-NEXT: add 3, 3, 30 +; PPC32-NEXT: lhz 11, 50(1) +; PPC32-NEXT: lhz 12, 46(1) +; PPC32-NEXT: lhz 0, 42(1) +; PPC32-NEXT: lhz 30, 70(1) +; PPC32-NEXT: lhz 29, 66(1) +; PPC32-NEXT: lhz 28, 62(1) +; PPC32-NEXT: lhz 27, 58(1) +; PPC32-NEXT: lhz 26, 54(1) +; PPC32-NEXT: add 3, 0, 3 +; PPC32-NEXT: add 4, 12, 4 +; PPC32-NEXT: add 5, 11, 5 +; PPC32-NEXT: add 6, 26, 6 +; PPC32-NEXT: add 7, 27, 7 +; PPC32-NEXT: add 8, 28, 8 +; PPC32-NEXT: add 9, 29, 9 +; PPC32-NEXT: add 10, 30, 10 ; PPC32-NEXT: addi 3, 3, 1 ; PPC32-NEXT: addi 4, 4, 1 ; PPC32-NEXT: addi 5, 5, 1 @@ -320,24 +319,23 @@ ; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lhz 11, 166(1) -; PPC64BE-NEXT: lhz 12, 174(1) -; PPC64BE-NEXT: lhz 0, 182(1) +; PPC64BE-NEXT: lhz 11, 142(1) +; PPC64BE-NEXT: lhz 12, 134(1) +; PPC64BE-NEXT: lhz 0, 126(1) ; PPC64BE-NEXT: lhz 30, 118(1) -; PPC64BE-NEXT: lhz 29, 126(1) -; PPC64BE-NEXT: lhz 28, 134(1) -; PPC64BE-NEXT: lhz 27, 142(1) -; PPC64BE-NEXT: lhz 26, 150(1) -; PPC64BE-NEXT: lhz 25, 158(1) -; PPC64BE-NEXT: std 2, -64(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 2, 30, 0 -; PPC64BE-NEXT: add 10, 10, 12 -; PPC64BE-NEXT: add 9, 9, 11 -; PPC64BE-NEXT: add 8, 8, 25 -; PPC64BE-NEXT: add 7, 7, 26 -; PPC64BE-NEXT: add 6, 6, 27 -; PPC64BE-NEXT: add 5, 5, 28 -; PPC64BE-NEXT: add 4, 4, 29 +; PPC64BE-NEXT: lhz 29, 182(1) +; PPC64BE-NEXT: lhz 28, 174(1) +; PPC64BE-NEXT: lhz 27, 166(1) +; PPC64BE-NEXT: lhz 26, 158(1) +; PPC64BE-NEXT: lhz 25, 150(1) +; PPC64BE-NEXT: add 4, 0, 4 +; PPC64BE-NEXT: add 5, 12, 5 +; PPC64BE-NEXT: add 6, 11, 6 +; PPC64BE-NEXT: add 7, 25, 7 +; PPC64BE-NEXT: add 8, 26, 8 +; PPC64BE-NEXT: add 9, 27, 9 +; PPC64BE-NEXT: add 10, 28, 10 +; PPC64BE-NEXT: add 11, 29, 30 ; PPC64BE-NEXT: addi 4, 4, 1 ; PPC64BE-NEXT: addi 5, 5, 1 ; PPC64BE-NEXT: addi 6, 6, 1 @@ -345,7 +343,7 @@ ; PPC64BE-NEXT: addi 8, 8, 1 ; PPC64BE-NEXT: addi 9, 9, 1 ; PPC64BE-NEXT: addi 10, 10, 1 -; PPC64BE-NEXT: addi 11, 2, 1 +; PPC64BE-NEXT: addi 11, 11, 1 ; PPC64BE-NEXT: sth 11, 14(3) ; PPC64BE-NEXT: sth 10, 12(3) ; PPC64BE-NEXT: sth 9, 10(3) @@ -354,7 +352,6 @@ ; PPC64BE-NEXT: sth 6, 4(3) ; PPC64BE-NEXT: sth 5, 2(3) ; PPC64BE-NEXT: sth 4, 0(3) -; PPC64BE-NEXT: ld 2, -64(1) # 8-byte Folded Reload ; PPC64BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; PPC64BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload ; PPC64BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload @@ -365,9 +362,8 @@ ; ; PPC64LE-LABEL: vector_i128_i16: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: vspltish 4, 1 -; PPC64LE-NEXT: vadduhm 2, 2, 3 -; PPC64LE-NEXT: vadduhm 2, 2, 4 +; PPC64LE-NEXT: xxlnor 34, 34, 34 +; PPC64LE-NEXT: vsubuhm 2, 3, 2 ; PPC64LE-NEXT: blr %t0 = add <8 x i16> %x, %t1 = add <8 x i16> %y, %t0 @@ -377,10 +373,10 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; PPC32-LABEL: vector_i128_i32: ; PPC32: # %bb.0: -; PPC32-NEXT: add 6, 6, 10 -; PPC32-NEXT: add 5, 5, 9 -; PPC32-NEXT: add 4, 4, 8 -; PPC32-NEXT: add 3, 3, 7 +; PPC32-NEXT: add 3, 7, 3 +; PPC32-NEXT: add 4, 8, 4 +; PPC32-NEXT: add 5, 9, 5 +; PPC32-NEXT: add 6, 10, 6 ; PPC32-NEXT: addi 3, 3, 1 ; PPC32-NEXT: addi 4, 4, 1 ; PPC32-NEXT: addi 5, 5, 1 @@ -389,10 +385,10 @@ ; ; PPC64BE-LABEL: vector_i128_i32: ; PPC64BE: # %bb.0: -; PPC64BE-NEXT: add 3, 3, 7 -; PPC64BE-NEXT: add 4, 4, 8 -; PPC64BE-NEXT: add 5, 5, 9 -; PPC64BE-NEXT: add 6, 6, 10 +; PPC64BE-NEXT: add 6, 10, 6 +; PPC64BE-NEXT: add 5, 9, 5 +; PPC64BE-NEXT: add 4, 8, 4 +; PPC64BE-NEXT: add 3, 7, 3 ; PPC64BE-NEXT: addi 6, 6, 1 ; PPC64BE-NEXT: addi 5, 5, 1 ; PPC64BE-NEXT: addi 4, 4, 1 @@ -401,9 +397,8 @@ ; ; PPC64LE-LABEL: vector_i128_i32: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: vspltisw 4, 1 -; PPC64LE-NEXT: vadduwm 2, 2, 3 -; PPC64LE-NEXT: vadduwm 2, 2, 4 +; PPC64LE-NEXT: xxlnor 34, 34, 34 +; PPC64LE-NEXT: vsubuwm 2, 3, 2 ; PPC64LE-NEXT: blr %t0 = add <4 x i32> %x, %t1 = add <4 x i32> %y, %t0 @@ -413,32 +408,28 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; PPC32-LABEL: vector_i128_i64: ; PPC32: # %bb.0: -; PPC32-NEXT: addc 6, 6, 10 -; PPC32-NEXT: adde 5, 5, 9 -; PPC32-NEXT: addc 4, 4, 8 -; PPC32-NEXT: adde 3, 3, 7 -; PPC32-NEXT: addic 4, 4, 1 -; PPC32-NEXT: addze 3, 3 -; PPC32-NEXT: addic 6, 6, 1 -; PPC32-NEXT: addze 5, 5 +; PPC32-NEXT: nor 4, 4, 4 +; PPC32-NEXT: nor 3, 3, 3 +; PPC32-NEXT: subfc 4, 4, 8 +; PPC32-NEXT: nor 6, 6, 6 +; PPC32-NEXT: subfe 3, 3, 7 +; PPC32-NEXT: nor 5, 5, 5 +; PPC32-NEXT: subfc 6, 6, 10 +; PPC32-NEXT: subfe 5, 5, 9 ; PPC32-NEXT: blr ; ; PPC64BE-LABEL: vector_i128_i64: ; PPC64BE: # %bb.0: -; PPC64BE-NEXT: add 4, 4, 6 -; PPC64BE-NEXT: add 3, 3, 5 +; PPC64BE-NEXT: add 3, 5, 3 +; PPC64BE-NEXT: add 4, 6, 4 ; PPC64BE-NEXT: addi 3, 3, 1 ; PPC64BE-NEXT: addi 4, 4, 1 ; PPC64BE-NEXT: blr ; ; PPC64LE-LABEL: vector_i128_i64: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; PPC64LE-NEXT: vaddudm 2, 2, 3 -; PPC64LE-NEXT: addi 3, 3, .LCPI7_0@toc@l -; PPC64LE-NEXT: lxvd2x 0, 0, 3 -; PPC64LE-NEXT: xxswapd 36, 0 -; PPC64LE-NEXT: vaddudm 2, 2, 4 +; PPC64LE-NEXT: xxlnor 34, 34, 34 +; PPC64LE-NEXT: vsubudm 2, 3, 2 ; PPC64LE-NEXT: blr %t0 = add <2 x i64> %x, %t1 = add <2 x i64> %y, %t0 Index: test/CodeGen/PowerPC/sub-of-not.ll =================================================================== --- test/CodeGen/PowerPC/sub-of-not.ll +++ test/CodeGen/PowerPC/sub-of-not.ll @@ -11,8 +11,8 @@ define i8 @scalar_i8(i8 %x, i8 %y) nounwind { ; ALL-LABEL: scalar_i8: ; ALL: # %bb.0: -; ALL-NEXT: nor 3, 3, 3 -; ALL-NEXT: subf 3, 3, 4 +; ALL-NEXT: add 3, 4, 3 +; ALL-NEXT: addi 3, 3, 1 ; ALL-NEXT: blr %t0 = xor i8 %x, -1 %t1 = sub i8 %y, %t0 @@ -22,8 +22,8 @@ define i16 @scalar_i16(i16 %x, i16 %y) nounwind { ; ALL-LABEL: scalar_i16: ; ALL: # %bb.0: -; ALL-NEXT: nor 3, 3, 3 -; ALL-NEXT: subf 3, 3, 4 +; ALL-NEXT: add 3, 4, 3 +; ALL-NEXT: addi 3, 3, 1 ; ALL-NEXT: blr %t0 = xor i16 %x, -1 %t1 = sub i16 %y, %t0 @@ -33,8 +33,8 @@ define i32 @scalar_i32(i32 %x, i32 %y) nounwind { ; ALL-LABEL: scalar_i32: ; ALL: # %bb.0: -; ALL-NEXT: nor 3, 3, 3 -; ALL-NEXT: subf 3, 3, 4 +; ALL-NEXT: add 3, 4, 3 +; ALL-NEXT: addi 3, 3, 1 ; ALL-NEXT: blr %t0 = xor i32 %x, -1 %t1 = sub i32 %y, %t0 @@ -44,16 +44,16 @@ define i64 @scalar_i64(i64 %x, i64 %y) nounwind { ; PPC32-LABEL: scalar_i64: ; PPC32: # %bb.0: -; PPC32-NEXT: nor 4, 4, 4 -; PPC32-NEXT: nor 3, 3, 3 -; PPC32-NEXT: subfc 4, 4, 6 -; PPC32-NEXT: subfe 3, 3, 5 +; PPC32-NEXT: addc 4, 6, 4 +; PPC32-NEXT: adde 3, 5, 3 +; PPC32-NEXT: addic 4, 4, 1 +; PPC32-NEXT: addze 3, 3 ; PPC32-NEXT: blr ; ; PPC64-LABEL: scalar_i64: ; PPC64: # %bb.0: -; PPC64-NEXT: not 3, 3 -; PPC64-NEXT: sub 3, 4, 3 +; PPC64-NEXT: add 3, 4, 3 +; PPC64-NEXT: addi 3, 3, 1 ; PPC64-NEXT: blr %t0 = xor i64 %x, -1 %t1 = sub i64 %y, %t0 @@ -63,183 +63,191 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC32-LABEL: vector_i128_i8: ; PPC32: # %bb.0: -; PPC32-NEXT: stwu 1, -48(1) -; PPC32-NEXT: lbz 4, 99(1) -; PPC32-NEXT: stw 23, 12(1) # 4-byte Folded Spill -; PPC32-NEXT: nor 5, 5, 5 -; PPC32-NEXT: lbz 23, 103(1) -; PPC32-NEXT: subf 4, 5, 4 -; PPC32-NEXT: lbz 5, 107(1) -; PPC32-NEXT: nor 6, 6, 6 -; PPC32-NEXT: subf 6, 6, 23 -; PPC32-NEXT: lbz 23, 111(1) -; PPC32-NEXT: nor 7, 7, 7 -; PPC32-NEXT: subf 5, 7, 5 -; PPC32-NEXT: lbz 7, 115(1) -; PPC32-NEXT: nor 8, 8, 8 -; PPC32-NEXT: stw 24, 16(1) # 4-byte Folded Spill -; PPC32-NEXT: subf 8, 8, 23 -; PPC32-NEXT: lbz 24, 119(1) -; PPC32-NEXT: lbz 23, 59(1) -; PPC32-NEXT: nor 9, 9, 9 -; PPC32-NEXT: stw 25, 20(1) # 4-byte Folded Spill -; PPC32-NEXT: subf 7, 9, 7 -; PPC32-NEXT: lbz 25, 123(1) -; PPC32-NEXT: lbz 9, 63(1) -; PPC32-NEXT: stw 26, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: nor 10, 10, 10 -; PPC32-NEXT: lbz 26, 127(1) -; PPC32-NEXT: subf 10, 10, 24 -; PPC32-NEXT: lbz 24, 67(1) -; PPC32-NEXT: nor 23, 23, 23 -; PPC32-NEXT: stw 27, 28(1) # 4-byte Folded Spill -; PPC32-NEXT: subf 25, 23, 25 -; PPC32-NEXT: lbz 27, 131(1) -; PPC32-NEXT: lbz 23, 71(1) -; PPC32-NEXT: nor 9, 9, 9 -; PPC32-NEXT: stw 28, 32(1) # 4-byte Folded Spill -; PPC32-NEXT: subf 9, 9, 26 -; PPC32-NEXT: lbz 28, 135(1) -; PPC32-NEXT: lbz 26, 75(1) -; PPC32-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; PPC32-NEXT: nor 24, 24, 24 -; PPC32-NEXT: lbz 29, 139(1) -; PPC32-NEXT: subf 27, 24, 27 -; PPC32-NEXT: lbz 24, 79(1) -; PPC32-NEXT: nor 23, 23, 23 -; PPC32-NEXT: stw 30, 40(1) # 4-byte Folded Spill -; PPC32-NEXT: subf 28, 23, 28 -; PPC32-NEXT: lbz 30, 143(1) -; PPC32-NEXT: lbz 23, 83(1) -; PPC32-NEXT: nor 26, 26, 26 -; PPC32-NEXT: lbz 0, 147(1) -; PPC32-NEXT: subf 29, 26, 29 -; PPC32-NEXT: lbz 26, 87(1) -; PPC32-NEXT: lbz 12, 151(1) -; PPC32-NEXT: nor 24, 24, 24 -; PPC32-NEXT: subf 30, 24, 30 -; PPC32-NEXT: lbz 24, 91(1) -; PPC32-NEXT: nor 23, 23, 23 -; PPC32-NEXT: lbz 11, 155(1) -; PPC32-NEXT: subf 0, 23, 0 -; PPC32-NEXT: lbz 23, 95(1) -; PPC32-NEXT: nor 26, 26, 26 -; PPC32-NEXT: subf 12, 26, 12 -; PPC32-NEXT: lbz 26, 159(1) -; PPC32-NEXT: nor 24, 24, 24 -; PPC32-NEXT: subf 11, 24, 11 -; PPC32-NEXT: nor 24, 23, 23 -; PPC32-NEXT: subf 26, 24, 26 -; PPC32-NEXT: stb 10, 5(3) -; PPC32-NEXT: stb 7, 4(3) -; PPC32-NEXT: stb 8, 3(3) -; PPC32-NEXT: stb 5, 2(3) -; PPC32-NEXT: stb 6, 1(3) -; PPC32-NEXT: stb 26, 15(3) -; PPC32-NEXT: stb 11, 14(3) +; PPC32-NEXT: stwu 1, -64(1) +; PPC32-NEXT: stw 21, 20(1) # 4-byte Folded Spill +; PPC32-NEXT: lbz 4, 119(1) +; PPC32-NEXT: lbz 11, 115(1) +; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill +; PPC32-NEXT: add 4, 4, 6 +; PPC32-NEXT: lbz 21, 123(1) +; PPC32-NEXT: lbz 6, 131(1) +; PPC32-NEXT: add 5, 11, 5 +; PPC32-NEXT: lbz 11, 127(1) +; PPC32-NEXT: add 7, 21, 7 +; PPC32-NEXT: lbz 21, 135(1) +; PPC32-NEXT: lbz 24, 83(1) +; PPC32-NEXT: lbz 23, 79(1) +; PPC32-NEXT: add 6, 6, 9 +; PPC32-NEXT: add 10, 21, 10 +; PPC32-NEXT: lbz 21, 147(1) +; PPC32-NEXT: lbz 9, 143(1) +; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill +; PPC32-NEXT: add 8, 11, 8 +; PPC32-NEXT: lbz 22, 75(1) +; PPC32-NEXT: lbz 11, 139(1) +; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: add 24, 21, 24 +; PPC32-NEXT: lbz 27, 95(1) +; PPC32-NEXT: lbz 21, 159(1) +; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill +; PPC32-NEXT: add 9, 9, 23 +; PPC32-NEXT: lbz 26, 91(1) +; PPC32-NEXT: lbz 23, 155(1) +; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill +; PPC32-NEXT: add 11, 11, 22 +; PPC32-NEXT: lbz 25, 87(1) +; PPC32-NEXT: lbz 22, 151(1) +; PPC32-NEXT: lbz 12, 111(1) +; PPC32-NEXT: add 27, 21, 27 +; PPC32-NEXT: lbz 21, 175(1) +; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill +; PPC32-NEXT: lbz 0, 107(1) +; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill +; PPC32-NEXT: add 26, 23, 26 +; PPC32-NEXT: lbz 30, 171(1) +; PPC32-NEXT: lbz 29, 103(1) +; PPC32-NEXT: lbz 23, 167(1) +; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 25, 22, 25 +; PPC32-NEXT: lbz 28, 99(1) +; PPC32-NEXT: lbz 22, 163(1) +; PPC32-NEXT: add 12, 21, 12 +; PPC32-NEXT: add 30, 30, 0 +; PPC32-NEXT: addi 12, 12, 1 +; PPC32-NEXT: add 29, 23, 29 +; PPC32-NEXT: stb 12, 15(3) +; PPC32-NEXT: addi 12, 30, 1 +; PPC32-NEXT: add 28, 22, 28 +; PPC32-NEXT: stb 12, 14(3) +; PPC32-NEXT: addi 12, 29, 1 ; PPC32-NEXT: stb 12, 13(3) -; PPC32-NEXT: stb 0, 12(3) -; PPC32-NEXT: stb 30, 11(3) -; PPC32-NEXT: stb 29, 10(3) -; PPC32-NEXT: stb 28, 9(3) -; PPC32-NEXT: stb 27, 8(3) +; PPC32-NEXT: addi 12, 28, 1 +; PPC32-NEXT: stb 12, 12(3) +; PPC32-NEXT: addi 12, 27, 1 +; PPC32-NEXT: stb 12, 11(3) +; PPC32-NEXT: addi 12, 26, 1 +; PPC32-NEXT: addi 9, 9, 1 +; PPC32-NEXT: addi 6, 6, 1 +; PPC32-NEXT: stb 12, 10(3) +; PPC32-NEXT: addi 12, 25, 1 ; PPC32-NEXT: stb 9, 7(3) -; PPC32-NEXT: stb 25, 6(3) +; PPC32-NEXT: addi 9, 11, 1 +; PPC32-NEXT: stb 6, 4(3) +; PPC32-NEXT: addi 6, 8, 1 +; PPC32-NEXT: addi 4, 4, 1 +; PPC32-NEXT: stb 12, 9(3) +; PPC32-NEXT: addi 12, 24, 1 +; PPC32-NEXT: stb 9, 6(3) +; PPC32-NEXT: addi 9, 10, 1 +; PPC32-NEXT: stb 6, 3(3) +; PPC32-NEXT: addi 6, 7, 1 +; PPC32-NEXT: stb 4, 1(3) +; PPC32-NEXT: addi 4, 5, 1 +; PPC32-NEXT: stb 12, 8(3) +; PPC32-NEXT: stb 9, 5(3) +; PPC32-NEXT: stb 6, 2(3) ; PPC32-NEXT: stb 4, 0(3) -; PPC32-NEXT: lwz 30, 40(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 29, 36(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 28, 32(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 27, 28(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 26, 24(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 25, 20(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 24, 16(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 23, 12(1) # 4-byte Folded Reload -; PPC32-NEXT: addi 1, 1, 48 +; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 28, 48(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 27, 44(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 25, 36(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 24, 32(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 23, 28(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 22, 24(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 21, 20(1) # 4-byte Folded Reload +; PPC32-NEXT: addi 1, 1, 64 ; PPC32-NEXT: blr ; ; PPC64BE-LABEL: vector_i128_i8: ; PPC64BE: # %bb.0: -; PPC64BE-NEXT: lbz 11, 191(1) -; PPC64BE-NEXT: nor 4, 4, 4 +; PPC64BE-NEXT: std 21, -88(1) # 8-byte Folded Spill +; PPC64BE-NEXT: lbz 21, 207(1) +; PPC64BE-NEXT: lbz 11, 199(1) +; PPC64BE-NEXT: lbz 12, 191(1) ; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 23, 199(1) -; PPC64BE-NEXT: nor 5, 5, 5 -; PPC64BE-NEXT: subf 4, 4, 11 -; PPC64BE-NEXT: lbz 11, 207(1) -; PPC64BE-NEXT: nor 6, 6, 6 -; PPC64BE-NEXT: subf 5, 5, 23 -; PPC64BE-NEXT: lbz 23, 215(1) -; PPC64BE-NEXT: subf 6, 6, 11 -; PPC64BE-NEXT: lbz 11, 223(1) -; PPC64BE-NEXT: nor 7, 7, 7 -; PPC64BE-NEXT: nor 8, 8, 8 -; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 24, 239(1) -; PPC64BE-NEXT: subf 7, 7, 23 -; PPC64BE-NEXT: lbz 23, 231(1) -; PPC64BE-NEXT: subf 8, 8, 11 -; PPC64BE-NEXT: lbz 11, 119(1) -; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill -; PPC64BE-NEXT: nor 9, 9, 9 -; PPC64BE-NEXT: lbz 25, 247(1) -; PPC64BE-NEXT: nor 10, 10, 10 -; PPC64BE-NEXT: subf 9, 9, 23 -; PPC64BE-NEXT: lbz 23, 127(1) -; PPC64BE-NEXT: subf 10, 10, 24 -; PPC64BE-NEXT: lbz 24, 135(1) -; PPC64BE-NEXT: nor 11, 11, 11 +; PPC64BE-NEXT: std 22, -80(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 27, 263(1) -; PPC64BE-NEXT: lbz 26, 255(1) -; PPC64BE-NEXT: subf 11, 11, 25 -; PPC64BE-NEXT: lbz 25, 143(1) -; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; PPC64BE-NEXT: nor 23, 23, 23 -; PPC64BE-NEXT: lbz 28, 271(1) -; PPC64BE-NEXT: nor 24, 24, 24 -; PPC64BE-NEXT: subf 26, 23, 26 -; PPC64BE-NEXT: lbz 23, 151(1) -; PPC64BE-NEXT: subf 27, 24, 27 -; PPC64BE-NEXT: lbz 24, 159(1) -; PPC64BE-NEXT: nor 25, 25, 25 +; PPC64BE-NEXT: std 2, -96(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 30, 287(1) -; PPC64BE-NEXT: lbz 29, 279(1) -; PPC64BE-NEXT: subf 28, 25, 28 -; PPC64BE-NEXT: lbz 25, 167(1) -; PPC64BE-NEXT: lbz 0, 295(1) -; PPC64BE-NEXT: nor 23, 23, 23 -; PPC64BE-NEXT: nor 24, 24, 24 -; PPC64BE-NEXT: subf 29, 23, 29 -; PPC64BE-NEXT: lbz 23, 175(1) -; PPC64BE-NEXT: subf 30, 24, 30 -; PPC64BE-NEXT: lbz 24, 183(1) -; PPC64BE-NEXT: nor 25, 25, 25 -; PPC64BE-NEXT: lbz 12, 303(1) -; PPC64BE-NEXT: subf 0, 25, 0 -; PPC64BE-NEXT: lbz 25, 311(1) -; PPC64BE-NEXT: nor 23, 23, 23 -; PPC64BE-NEXT: nor 24, 24, 24 -; PPC64BE-NEXT: subf 12, 23, 12 -; PPC64BE-NEXT: subf 25, 24, 25 +; PPC64BE-NEXT: lbz 0, 183(1) +; PPC64BE-NEXT: add 6, 21, 6 +; PPC64BE-NEXT: lbz 21, 231(1) +; PPC64BE-NEXT: add 5, 11, 5 +; PPC64BE-NEXT: lbz 11, 223(1) +; PPC64BE-NEXT: add 4, 12, 4 +; PPC64BE-NEXT: lbz 12, 215(1) +; PPC64BE-NEXT: lbz 23, 127(1) +; PPC64BE-NEXT: add 9, 21, 9 +; PPC64BE-NEXT: lbz 21, 255(1) +; PPC64BE-NEXT: lbz 22, 119(1) +; PPC64BE-NEXT: add 8, 11, 8 +; PPC64BE-NEXT: lbz 11, 247(1) +; PPC64BE-NEXT: add 7, 12, 7 +; PPC64BE-NEXT: lbz 12, 239(1) +; PPC64BE-NEXT: lbz 26, 151(1) +; PPC64BE-NEXT: add 2, 21, 23 +; PPC64BE-NEXT: lbz 23, 279(1) +; PPC64BE-NEXT: lbz 25, 143(1) +; PPC64BE-NEXT: add 11, 11, 22 +; PPC64BE-NEXT: lbz 22, 271(1) +; PPC64BE-NEXT: lbz 24, 135(1) +; PPC64BE-NEXT: add 10, 12, 10 +; PPC64BE-NEXT: lbz 12, 263(1) +; PPC64BE-NEXT: lbz 30, 175(1) +; PPC64BE-NEXT: lbz 29, 303(1) +; PPC64BE-NEXT: add 26, 23, 26 +; PPC64BE-NEXT: lbz 23, 311(1) +; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PPC64BE-NEXT: add 25, 22, 25 +; PPC64BE-NEXT: lbz 28, 167(1) +; PPC64BE-NEXT: lbz 22, 295(1) +; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; PPC64BE-NEXT: add 12, 12, 24 +; PPC64BE-NEXT: lbz 27, 159(1) +; PPC64BE-NEXT: lbz 24, 287(1) +; PPC64BE-NEXT: add 30, 29, 30 +; PPC64BE-NEXT: add 29, 23, 0 +; PPC64BE-NEXT: addi 0, 29, 1 +; PPC64BE-NEXT: add 28, 22, 28 +; PPC64BE-NEXT: stb 0, 15(3) +; PPC64BE-NEXT: addi 0, 30, 1 +; PPC64BE-NEXT: add 27, 24, 27 +; PPC64BE-NEXT: stb 0, 14(3) +; PPC64BE-NEXT: addi 0, 28, 1 +; PPC64BE-NEXT: stb 0, 13(3) +; PPC64BE-NEXT: addi 0, 27, 1 +; PPC64BE-NEXT: stb 0, 12(3) +; PPC64BE-NEXT: addi 0, 26, 1 +; PPC64BE-NEXT: addi 12, 12, 1 +; PPC64BE-NEXT: stb 0, 11(3) +; PPC64BE-NEXT: addi 0, 25, 1 +; PPC64BE-NEXT: stb 12, 9(3) +; PPC64BE-NEXT: addi 12, 2, 1 +; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: addi 4, 4, 1 +; PPC64BE-NEXT: stb 0, 10(3) +; PPC64BE-NEXT: stb 12, 8(3) +; PPC64BE-NEXT: stb 11, 7(3) ; PPC64BE-NEXT: stb 10, 6(3) ; PPC64BE-NEXT: stb 9, 5(3) ; PPC64BE-NEXT: stb 8, 4(3) ; PPC64BE-NEXT: stb 7, 3(3) ; PPC64BE-NEXT: stb 6, 2(3) ; PPC64BE-NEXT: stb 5, 1(3) -; PPC64BE-NEXT: stb 25, 15(3) -; PPC64BE-NEXT: stb 12, 14(3) -; PPC64BE-NEXT: stb 0, 13(3) -; PPC64BE-NEXT: stb 30, 12(3) -; PPC64BE-NEXT: stb 29, 11(3) -; PPC64BE-NEXT: stb 28, 10(3) -; PPC64BE-NEXT: stb 27, 9(3) -; PPC64BE-NEXT: stb 26, 8(3) -; PPC64BE-NEXT: stb 11, 7(3) ; PPC64BE-NEXT: stb 4, 0(3) +; PPC64BE-NEXT: ld 2, -96(1) # 8-byte Folded Reload ; PPC64BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; PPC64BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload ; PPC64BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload @@ -248,6 +256,8 @@ ; PPC64BE-NEXT: ld 25, -56(1) # 8-byte Folded Reload ; PPC64BE-NEXT: ld 24, -64(1) # 8-byte Folded Reload ; PPC64BE-NEXT: ld 23, -72(1) # 8-byte Folded Reload +; PPC64BE-NEXT: ld 22, -80(1) # 8-byte Folded Reload +; PPC64BE-NEXT: ld 21, -88(1) # 8-byte Folded Reload ; PPC64BE-NEXT: blr ; ; PPC64LE-LABEL: vector_i128_i8: @@ -269,30 +279,30 @@ ; PPC32-NEXT: stw 28, 16(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 29, 20(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: lhz 11, 70(1) -; PPC32-NEXT: lhz 12, 66(1) -; PPC32-NEXT: lhz 0, 62(1) -; PPC32-NEXT: nor 10, 10, 10 -; PPC32-NEXT: lhz 30, 58(1) -; PPC32-NEXT: lhz 29, 54(1) -; PPC32-NEXT: lhz 28, 50(1) -; PPC32-NEXT: lhz 27, 46(1) -; PPC32-NEXT: lhz 26, 42(1) -; PPC32-NEXT: nor 9, 9, 9 -; PPC32-NEXT: nor 8, 8, 8 -; PPC32-NEXT: nor 7, 7, 7 -; PPC32-NEXT: nor 6, 6, 6 -; PPC32-NEXT: nor 5, 5, 5 -; PPC32-NEXT: nor 4, 4, 4 -; PPC32-NEXT: nor 3, 3, 3 -; PPC32-NEXT: subf 3, 3, 26 -; PPC32-NEXT: subf 4, 4, 27 -; PPC32-NEXT: subf 5, 5, 28 -; PPC32-NEXT: subf 6, 6, 29 -; PPC32-NEXT: subf 7, 7, 30 -; PPC32-NEXT: subf 8, 8, 0 -; PPC32-NEXT: subf 9, 9, 12 -; PPC32-NEXT: subf 10, 10, 11 +; PPC32-NEXT: lhz 11, 50(1) +; PPC32-NEXT: lhz 12, 46(1) +; PPC32-NEXT: lhz 0, 42(1) +; PPC32-NEXT: lhz 30, 70(1) +; PPC32-NEXT: lhz 29, 66(1) +; PPC32-NEXT: lhz 28, 62(1) +; PPC32-NEXT: lhz 27, 58(1) +; PPC32-NEXT: lhz 26, 54(1) +; PPC32-NEXT: add 3, 0, 3 +; PPC32-NEXT: add 4, 12, 4 +; PPC32-NEXT: add 5, 11, 5 +; PPC32-NEXT: add 6, 26, 6 +; PPC32-NEXT: add 7, 27, 7 +; PPC32-NEXT: add 8, 28, 8 +; PPC32-NEXT: add 9, 29, 9 +; PPC32-NEXT: add 10, 30, 10 +; PPC32-NEXT: addi 3, 3, 1 +; PPC32-NEXT: addi 4, 4, 1 +; PPC32-NEXT: addi 5, 5, 1 +; PPC32-NEXT: addi 6, 6, 1 +; PPC32-NEXT: addi 7, 7, 1 +; PPC32-NEXT: addi 8, 8, 1 +; PPC32-NEXT: addi 9, 9, 1 +; PPC32-NEXT: addi 10, 10, 1 ; PPC32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload ; PPC32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload ; PPC32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload @@ -304,43 +314,43 @@ ; PPC64BE-LABEL: vector_i128_i16: ; PPC64BE: # %bb.0: ; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lhz 25, 118(1) ; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lhz 11, 182(1) -; PPC64BE-NEXT: lhz 12, 174(1) -; PPC64BE-NEXT: lhz 0, 166(1) -; PPC64BE-NEXT: nor 10, 10, 10 -; PPC64BE-NEXT: lhz 30, 158(1) -; PPC64BE-NEXT: lhz 29, 150(1) -; PPC64BE-NEXT: lhz 28, 142(1) -; PPC64BE-NEXT: lhz 27, 134(1) -; PPC64BE-NEXT: lhz 26, 126(1) -; PPC64BE-NEXT: nor 9, 9, 9 -; PPC64BE-NEXT: nor 8, 8, 8 -; PPC64BE-NEXT: nor 7, 7, 7 -; PPC64BE-NEXT: nor 6, 6, 6 -; PPC64BE-NEXT: nor 5, 5, 5 -; PPC64BE-NEXT: nor 4, 4, 4 -; PPC64BE-NEXT: nor 25, 25, 25 -; PPC64BE-NEXT: subf 4, 4, 26 -; PPC64BE-NEXT: subf 5, 5, 27 -; PPC64BE-NEXT: subf 6, 6, 28 -; PPC64BE-NEXT: subf 7, 7, 29 -; PPC64BE-NEXT: subf 8, 8, 30 -; PPC64BE-NEXT: subf 9, 9, 0 -; PPC64BE-NEXT: subf 10, 10, 12 -; PPC64BE-NEXT: subf 11, 25, 11 +; PPC64BE-NEXT: lhz 11, 142(1) +; PPC64BE-NEXT: lhz 12, 134(1) +; PPC64BE-NEXT: lhz 0, 126(1) +; PPC64BE-NEXT: lhz 30, 118(1) +; PPC64BE-NEXT: lhz 29, 182(1) +; PPC64BE-NEXT: lhz 28, 174(1) +; PPC64BE-NEXT: lhz 27, 166(1) +; PPC64BE-NEXT: lhz 26, 158(1) +; PPC64BE-NEXT: lhz 25, 150(1) +; PPC64BE-NEXT: add 4, 0, 4 +; PPC64BE-NEXT: add 5, 12, 5 +; PPC64BE-NEXT: add 6, 11, 6 +; PPC64BE-NEXT: add 7, 25, 7 +; PPC64BE-NEXT: add 8, 26, 8 +; PPC64BE-NEXT: add 9, 27, 9 +; PPC64BE-NEXT: add 10, 28, 10 +; PPC64BE-NEXT: add 11, 29, 30 +; PPC64BE-NEXT: addi 4, 4, 1 +; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: sth 11, 14(3) ; PPC64BE-NEXT: sth 10, 12(3) ; PPC64BE-NEXT: sth 9, 10(3) ; PPC64BE-NEXT: sth 8, 8(3) ; PPC64BE-NEXT: sth 7, 6(3) ; PPC64BE-NEXT: sth 6, 4(3) ; PPC64BE-NEXT: sth 5, 2(3) -; PPC64BE-NEXT: sth 11, 14(3) ; PPC64BE-NEXT: sth 4, 0(3) ; PPC64BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; PPC64BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload @@ -363,26 +373,26 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; PPC32-LABEL: vector_i128_i32: ; PPC32: # %bb.0: -; PPC32-NEXT: nor 6, 6, 6 -; PPC32-NEXT: nor 5, 5, 5 -; PPC32-NEXT: nor 4, 4, 4 -; PPC32-NEXT: nor 3, 3, 3 -; PPC32-NEXT: subf 3, 3, 7 -; PPC32-NEXT: subf 4, 4, 8 -; PPC32-NEXT: subf 5, 5, 9 -; PPC32-NEXT: subf 6, 6, 10 +; PPC32-NEXT: add 3, 7, 3 +; PPC32-NEXT: add 4, 8, 4 +; PPC32-NEXT: add 5, 9, 5 +; PPC32-NEXT: add 6, 10, 6 +; PPC32-NEXT: addi 3, 3, 1 +; PPC32-NEXT: addi 4, 4, 1 +; PPC32-NEXT: addi 5, 5, 1 +; PPC32-NEXT: addi 6, 6, 1 ; PPC32-NEXT: blr ; ; PPC64BE-LABEL: vector_i128_i32: ; PPC64BE: # %bb.0: -; PPC64BE-NEXT: nor 3, 3, 3 -; PPC64BE-NEXT: nor 4, 4, 4 -; PPC64BE-NEXT: nor 5, 5, 5 -; PPC64BE-NEXT: nor 6, 6, 6 -; PPC64BE-NEXT: subf 6, 6, 10 -; PPC64BE-NEXT: subf 5, 5, 9 -; PPC64BE-NEXT: subf 4, 4, 8 -; PPC64BE-NEXT: subf 3, 3, 7 +; PPC64BE-NEXT: add 6, 10, 6 +; PPC64BE-NEXT: add 5, 9, 5 +; PPC64BE-NEXT: add 4, 8, 4 +; PPC64BE-NEXT: add 3, 7, 3 +; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: addi 4, 4, 1 +; PPC64BE-NEXT: addi 3, 3, 1 ; PPC64BE-NEXT: blr ; ; PPC64LE-LABEL: vector_i128_i32: @@ -410,10 +420,10 @@ ; ; PPC64BE-LABEL: vector_i128_i64: ; PPC64BE: # %bb.0: -; PPC64BE-NEXT: not 4, 4 -; PPC64BE-NEXT: not 3, 3 -; PPC64BE-NEXT: sub 3, 5, 3 -; PPC64BE-NEXT: sub 4, 6, 4 +; PPC64BE-NEXT: add 3, 5, 3 +; PPC64BE-NEXT: add 4, 6, 4 +; PPC64BE-NEXT: addi 3, 3, 1 +; PPC64BE-NEXT: addi 4, 4, 1 ; PPC64BE-NEXT: blr ; ; PPC64LE-LABEL: vector_i128_i64: Index: test/CodeGen/X86/sub-of-not.ll =================================================================== --- test/CodeGen/X86/sub-of-not.ll +++ test/CodeGen/X86/sub-of-not.ll @@ -13,16 +13,16 @@ ; X86-LABEL: scalar_i8: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: notb %cl -; X86-NEXT: subb %cl, %al +; X86-NEXT: addb {{[0-9]+}}(%esp), %al +; X86-NEXT: incb %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notb %dil -; X64-NEXT: subb %dil, %al +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rsi,%rdi), %eax +; X64-NEXT: incb %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = xor i8 %x, -1 @@ -33,18 +33,17 @@ define i16 @scalar_i16(i16 %x, i16 %y) nounwind { ; X86-LABEL: scalar_i16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: notl %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addw {{[0-9]+}}(%esp), %ax +; X86-NEXT: incl %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: scalar_i16: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %edi -; X64-NEXT: subl %edi, %eax +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal 1(%rsi,%rdi), %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = xor i16 %x, -1 @@ -53,19 +52,26 @@ } define i32 @scalar_i32(i32 %x, i32 %y) nounwind { -; X86-LABEL: scalar_i32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: notl %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: retl +; X86-SLOWLEA-LABEL: scalar_i32: +; X86-SLOWLEA: # %bb.0: +; X86-SLOWLEA-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOWLEA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOWLEA-NEXT: addl %ecx, %eax +; X86-SLOWLEA-NEXT: addl $1, %eax +; X86-SLOWLEA-NEXT: retl +; +; X86-FASTLEA-LABEL: scalar_i32: +; X86-FASTLEA: # %bb.0: +; X86-FASTLEA-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FASTLEA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-FASTLEA-NEXT: leal 1(%ecx,%eax), %eax +; X86-FASTLEA-NEXT: retl ; ; X64-LABEL: scalar_i32: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %edi -; X64-NEXT: subl %edi, %eax +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal 1(%rsi,%rdi), %eax ; X64-NEXT: retq %t0 = xor i32 %x, -1 %t1 = sub i32 %y, %t0 @@ -75,23 +81,17 @@ define i64 @scalar_i64(i64 %x, i64 %y) nounwind { ; X86-LABEL: scalar_i64: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: notl %esi -; X86-NEXT: notl %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: sbbl %esi, %edx -; X86-NEXT: popl %esi +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl $1, %eax +; X86-NEXT: adcl $0, %edx ; X86-NEXT: retl ; ; X64-LABEL: scalar_i64: ; X64: # %bb.0: -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: notq %rdi -; X64-NEXT: subq %rdi, %rax +; X64-NEXT: leaq 1(%rsi,%rdi), %rax ; X64-NEXT: retq %t0 = xor i64 %x, -1 %t1 = sub i64 %y, %t0 @@ -101,10 +101,9 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; ALL-LABEL: vector_i128_i8: ; ALL: # %bb.0: -; ALL-NEXT: pcmpeqd %xmm2, %xmm2 -; ALL-NEXT: pxor %xmm0, %xmm2 -; ALL-NEXT: psubb %xmm2, %xmm1 -; ALL-NEXT: movdqa %xmm1, %xmm0 +; ALL-NEXT: paddb %xmm1, %xmm0 +; ALL-NEXT: pcmpeqd %xmm1, %xmm1 +; ALL-NEXT: psubb %xmm1, %xmm0 ; ALL-NEXT: ret{{[l|q]}} %t0 = xor <16 x i8> %x, %t1 = sub <16 x i8> %y, %t0 @@ -114,10 +113,9 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; ALL-LABEL: vector_i128_i16: ; ALL: # %bb.0: -; ALL-NEXT: pcmpeqd %xmm2, %xmm2 -; ALL-NEXT: pxor %xmm0, %xmm2 -; ALL-NEXT: psubw %xmm2, %xmm1 -; ALL-NEXT: movdqa %xmm1, %xmm0 +; ALL-NEXT: paddw %xmm1, %xmm0 +; ALL-NEXT: pcmpeqd %xmm1, %xmm1 +; ALL-NEXT: psubw %xmm1, %xmm0 ; ALL-NEXT: ret{{[l|q]}} %t0 = xor <8 x i16> %x, %t1 = sub <8 x i16> %y, %t0 @@ -127,10 +125,9 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; ALL-LABEL: vector_i128_i32: ; ALL: # %bb.0: -; ALL-NEXT: pcmpeqd %xmm2, %xmm2 -; ALL-NEXT: pxor %xmm0, %xmm2 -; ALL-NEXT: psubd %xmm2, %xmm1 -; ALL-NEXT: movdqa %xmm1, %xmm0 +; ALL-NEXT: paddd %xmm1, %xmm0 +; ALL-NEXT: pcmpeqd %xmm1, %xmm1 +; ALL-NEXT: psubd %xmm1, %xmm0 ; ALL-NEXT: ret{{[l|q]}} %t0 = xor <4 x i32> %x, %t1 = sub <4 x i32> %y, %t0 @@ -140,10 +137,9 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; ALL-LABEL: vector_i128_i64: ; ALL: # %bb.0: -; ALL-NEXT: pcmpeqd %xmm2, %xmm2 -; ALL-NEXT: pxor %xmm0, %xmm2 -; ALL-NEXT: psubq %xmm2, %xmm1 -; ALL-NEXT: movdqa %xmm1, %xmm0 +; ALL-NEXT: paddq %xmm1, %xmm0 +; ALL-NEXT: pcmpeqd %xmm1, %xmm1 +; ALL-NEXT: psubq %xmm1, %xmm0 ; ALL-NEXT: ret{{[l|q]}} %t0 = xor <2 x i64> %x, %t1 = sub <2 x i64> %y, %t0