Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -458,7 +458,9 @@ SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1); + SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); @@ -1000,53 +1002,50 @@ ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); } -SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, - SDValue N1, SDNodeFlags Flags) { +// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression +// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc. +SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, + SDValue N0, SDValue N1) { + EVT VT = N0.getValueType(); + + if (N0.getOpcode() != Opc) + return SDValue(); + // Don't reassociate reductions. - if (Flags.hasVectorReduction()) + if (N0->getFlags().hasVectorReduction()) return SDValue(); - EVT VT = N0.getValueType(); - if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) { - if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { - // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); - return SDValue(); - } - if (N0.hasOneUse()) { - // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one - // use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); - if (!OpNode.getNode()) - return SDValue(); - AddToWorklist(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); - } + if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2)) + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + return SDValue(); } - } - - if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) { - if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { - if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { - // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) - return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + if (N0.hasOneUse()) { + // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) + // iff (op x, c1) has one use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); + if (!OpNode.getNode()) return SDValue(); - } - if (N1.hasOneUse()) { - // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one - // use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); - if (!OpNode.getNode()) - return SDValue(); - AddToWorklist(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); - } + AddToWorklist(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); } } + return SDValue(); +} +// Try to reassociate commutative binops. +SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1, SDNodeFlags Flags) { + assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative."); + // Don't reassociate reductions. + if (Flags.hasVectorReduction()) + return SDValue(); + if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1)) + return Combined; + if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0)) + return Combined; return SDValue(); } @@ -2193,7 +2192,7 @@ return NewSel; // reassociate add - if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) + if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) return RADD; // fold ((0-A) + B) -> B-A @@ -3275,7 +3274,7 @@ N0.getOperand(1), N1)); // reassociate mul - if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) + if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) return RMUL; return SDValue(); @@ -4799,7 +4798,7 @@ return NewSel; // reassociate and - if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) + if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) return RAND; // Try to convert a constant mask AND into a shuffle clear mask. @@ -5525,7 +5524,7 @@ return BSwap; // reassociate or - if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) + if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) @@ -6412,7 +6411,7 @@ return NewSel; // reassociate xor - if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) + if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) return RXOR; // fold !(x cc y) -> (x !cc y) Index: llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll @@ -19,8 +19,8 @@ ; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]] ; CHECK-NEXT: b.ne ; Next BB -; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], [[I2]] -; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], [[I1]] +; CHECK: add [[BLOCKBASE1:x[0-9]+]], [[I1]], [[BLOCKBASE]] +; CHECK-NEXT: add [[BLOCKBASE2:x[0-9]+]], [[I2]], [[BLOCKBASE]] ; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1] ; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1] ; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]] Index: llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll +++ llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll @@ -184,14 +184,14 @@ ; VI: s_and_b32 s1, s0, 0xffff0000 ; VI: s_add_i32 s0, s0, 1 ; VI: s_and_b32 s0, s0, 0xffff -; VI: s_or_b32 s0, s0, s1 +; VI: s_or_b32 s0, s1, s0 ; VI: s_add_i32 s0, s0, 0x10000 ; VI: v_mov_b32_e32 v0, s0 ; SI: s_lshl_b32 s1, s1, 16 ; SI: s_add_i32 s0, s0, 1 ; SI: s_and_b32 s0, s0, 0xffff -; SI: s_or_b32 s0, s0, s1 +; SI: s_or_b32 s0, s1, s0 ; SI: s_add_i32 s0, s0, 0x10000 define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) { %add = add <2 x i16> %arg0, Index: llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -289,18 +289,18 @@ ; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v1 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v1 ; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v6 -; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4 ; SI-NEXT: v_and_b32_e32 v7, s12, v7 +; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_or_b32_e32 v1, v7, v6 +; SI-NEXT: v_or_b32_e32 v0, v6, v7 ; SI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; SI-NEXT: v_and_b32_e32 v0, s12, v4 -; SI-NEXT: v_or_b32_e32 v0, v0, v5 -; SI-NEXT: v_add_i32_e32 v1, vcc, 0x900, v1 -; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; SI-NEXT: v_and_b32_e32 v1, s12, v4 +; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0 +; SI-NEXT: v_or_b32_e32 v1, v5, v1 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-NEXT: v_or_b32_e32 v0, v1, v0 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -335,8 +335,8 @@ ; VI-NEXT: v_add_u16_e32 v9, 9, v5 ; VI-NEXT: v_add_u16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v7 -; VI-NEXT: v_or_b32_sdwa v0, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v0, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; VI-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; VI-NEXT: v_add_u16_e32 v0, s8, v0 ; VI-NEXT: v_add_u16_sdwa v1, v1, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_e32 v0, v0, v1 Index: llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll +++ llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll @@ -71,7 +71,7 @@ ; FUNC-LABEL: {{^}}test_add_shl_add_constant_inv: ; SI-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Y:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13 ; SI: s_lshl_b32 [[SHL3:s[0-9]+]], s[[X]], 3 -; SI: s_add_i32 [[TMP:s[0-9]+]], s[[Y]], [[SHL3]] +; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], s[[Y]] ; SI: s_addk_i32 [[TMP]], 0x3d8 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]] ; SI: buffer_store_dword [[VRESULT]] Index: llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll +++ llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll @@ -216,7 +216,7 @@ ; SI-NEXT: s_add_i32 s0, s0, 12 ; SI-NEXT: s_or_b32 s0, s0, 4 ; SI-NEXT: s_and_b32 s0, s0, 0xff -; SI-NEXT: s_or_b32 s0, s0, s1 +; SI-NEXT: s_or_b32 s0, s1, s0 ; SI-NEXT: s_addk_i32 s0, 0x2c00 ; SI-NEXT: s_or_b32 s0, s0, 0x300 ; SI-NEXT: v_mov_b32_e32 v0, s0 Index: llvm/trunk/test/CodeGen/ARM/and-load-combine.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/and-load-combine.ll +++ llvm/trunk/test/CodeGen/ARM/and-load-combine.ll @@ -414,35 +414,35 @@ define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) { ; ARM-LABEL: cmp_and8_short_int: ; ARM: @ %bb.0: @ %entry -; ARM-NEXT: ldrb r0, [r0] ; ARM-NEXT: ldrb r1, [r1] -; ARM-NEXT: and r0, r1, r0 +; ARM-NEXT: ldrb r0, [r0] +; ARM-NEXT: and r0, r0, r1 ; ARM-NEXT: clz r0, r0 ; ARM-NEXT: lsr r0, r0, #5 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_and8_short_int: ; ARMEB: @ %bb.0: @ %entry -; ARMEB-NEXT: ldrb r0, [r0, #1] ; ARMEB-NEXT: ldrb r1, [r1, #3] -; ARMEB-NEXT: and r0, r1, r0 +; ARMEB-NEXT: ldrb r0, [r0, #1] +; ARMEB-NEXT: and r0, r0, r1 ; ARMEB-NEXT: clz r0, r0 ; ARMEB-NEXT: lsr r0, r0, #5 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_and8_short_int: ; THUMB1: @ %bb.0: @ %entry -; THUMB1-NEXT: ldrb r0, [r0] ; THUMB1-NEXT: ldrb r1, [r1] -; THUMB1-NEXT: ands r1, r0 -; THUMB1-NEXT: rsbs r0, r1, #0 -; THUMB1-NEXT: adcs r0, r1 +; THUMB1-NEXT: ldrb r2, [r0] +; THUMB1-NEXT: ands r2, r1 +; THUMB1-NEXT: rsbs r0, r2, #0 +; THUMB1-NEXT: adcs r0, r2 ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_and8_short_int: ; THUMB2: @ %bb.0: @ %entry -; THUMB2-NEXT: ldrb r0, [r0] ; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: ldrb r0, [r0] ; THUMB2-NEXT: ands r0, r1 ; THUMB2-NEXT: clz r0, r0 ; THUMB2-NEXT: lsrs r0, r0, #5 @@ -846,7 +846,7 @@ ; ARM-LABEL: test6: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r0, [r0] -; ARM-NEXT: and r0, r0, r1 +; ARM-NEXT: and r0, r1, r0 ; ARM-NEXT: uxtb r1, r2 ; ARM-NEXT: sub r0, r0, r1 ; ARM-NEXT: clz r0, r0 @@ -856,7 +856,7 @@ ; ARMEB-LABEL: test6: ; ARMEB: @ %bb.0: @ %entry ; ARMEB-NEXT: ldrb r0, [r0] -; ARMEB-NEXT: and r0, r0, r1 +; ARMEB-NEXT: and r0, r1, r0 ; ARMEB-NEXT: uxtb r1, r2 ; ARMEB-NEXT: sub r0, r0, r1 ; ARMEB-NEXT: clz r0, r0 @@ -893,7 +893,7 @@ ; ARM-LABEL: test7: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r0, [r0] -; ARM-NEXT: and r0, r0, r1 +; ARM-NEXT: and r0, r1, r0 ; ARM-NEXT: uxtb r1, r2 ; ARM-NEXT: sub r0, r0, r1 ; ARM-NEXT: clz r0, r0 @@ -903,7 +903,7 @@ ; ARMEB-LABEL: test7: ; ARMEB: @ %bb.0: @ %entry ; ARMEB-NEXT: ldrb r0, [r0, #1] -; ARMEB-NEXT: and r0, r0, r1 +; ARMEB-NEXT: and r0, r1, r0 ; ARMEB-NEXT: uxtb r1, r2 ; ARMEB-NEXT: sub r0, r0, r1 ; ARMEB-NEXT: clz r0, r0 @@ -1550,34 +1550,34 @@ ret i64 %and } +define void @test27(i32* nocapture %ptr) { ; ARM-LABEL: test27: -; ARM: @ %bb.0: +; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r1, [r0, #1] ; ARM-NEXT: lsl r1, r1, #16 ; ARM-NEXT: str r1, [r0] ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: test27: -; ARMEB: @ %bb.0: -; ARMEB-NEXT: ldrb r1, [r0, #2] -; ARMEB-NEXT: lsl r1, r1, #16 -; ARMEB-NEXT: str r1, [r0] -; ARMEB-NEXT: bx lr +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrb r1, [r0, #2] +; ARMEB-NEXT: lsl r1, r1, #16 +; ARMEB-NEXT: str r1, [r0] +; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: test27: -; THUMB1: @ %bb.0: -; THUMB1-NEXT: ldrb r1, [r0, #1] -; THUMB1-NEXT: lsls r1, r1, #16 -; THUMB1-NEXT: str r1, [r0] -; THUMB1-NEXT: bx lr +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrb r1, [r0, #1] +; THUMB1-NEXT: lsls r1, r1, #16 +; THUMB1-NEXT: str r1, [r0] +; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: test27: -; THUMB2: @ %bb.0: +; THUMB2: @ %bb.0: @ %entry ; THUMB2-NEXT: ldrb r1, [r0, #1] ; THUMB2-NEXT: lsls r1, r1, #16 ; THUMB2-NEXT: str r1, [r0] ; THUMB2-NEXT: bx lr -define void @test27(i32* nocapture %ptr) { entry: %0 = load i32, i32* %ptr, align 4 %and = and i32 %0, 65280 Index: llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll +++ llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll @@ -528,7 +528,7 @@ ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: -; CHECK: add r0, r0, r1 +; CHECK: add r0, r1, r0 ; CHECK-NEXT: mov r1, #65280 ; CHECK-NEXT: mov r2, #16711680 ; CHECK-NEXT: ldr r0, [r0, #13] @@ -540,7 +540,7 @@ ; CHECK-NEXT: mov pc, lr ; ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2: -; CHECK-ARMv6: add r0, r0, r1 +; CHECK-ARMv6: add r0, r1, r0 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #13] ; CHECK-ARMv6-NEXT: rev r0, r0 ; CHECK-ARMv6-NEXT: bx lr Index: llvm/trunk/test/CodeGen/ARM/load-combine.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/load-combine.ll +++ llvm/trunk/test/CodeGen/ARM/load-combine.ll @@ -479,12 +479,12 @@ ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: -; CHECK: add r0, r0, r1 +; CHECK: add r0, r1, r0 ; CHECK-NEXT: ldr r0, [r0, #13] ; CHECK-NEXT: mov pc, lr ; ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2: -; CHECK-ARMv6: add r0, r0, r1 +; CHECK-ARMv6: add r0, r1, r0 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #13] ; CHECK-ARMv6-NEXT: bx lr %tmp = add nuw nsw i32 %i, 4 Index: llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll +++ llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: vn %v0, %v0, %v0 ; CHECK-NEXT: vno %v2, %v2, %v2 ; CHECK-NEXT: vceqg %v0, %v0, %v1 -; CHECK-NEXT: vx %v0, %v2, %v0 +; CHECK-NEXT: vx %v0, %v0, %v2 ; CHECK-NEXT: vnc %v0, %v2, %v0 ; CHECK-NEXT: vlgvf %r0, %v0, 1 ; CHECK-NEXT: tmll %r0, 1 Index: llvm/trunk/test/CodeGen/Thumb2/constant-hoisting.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/constant-hoisting.ll +++ llvm/trunk/test/CodeGen/Thumb2/constant-hoisting.ll @@ -17,16 +17,16 @@ ; CHECK-V6M-NEXT: adds r0, r1, r0 ; CHECK-V6M-NEXT: bx lr ; CHECK-V6M-NEXT: .LBB0_5: -; CHECK-V6M-NEXT: adds r0, r1, r0 +; CHECK-V6M-NEXT: adds r0, r0, r1 ; CHECK-V6M-NEXT: adds r0, r0, #4 ; CHECK-V6M-NEXT: .LBB0_6: ; CHECK-V6M-NEXT: bx lr ; CHECK-V6M-NEXT: .LBB0_7: -; CHECK-V6M-NEXT: adds r0, r1, r0 +; CHECK-V6M-NEXT: adds r0, r0, r1 ; CHECK-V6M-NEXT: adds r0, r0, #1 ; CHECK-V6M-NEXT: bx lr ; CHECK-V6M-NEXT: .LBB0_8: -; CHECK-V6M-NEXT: adds r0, r1, r0 +; CHECK-V6M-NEXT: adds r0, r0, r1 ; CHECK-V6M-NEXT: adds r0, r0, #2 ; CHECK-V6M-NEXT: bx lr ; CHECK-V6M-NEXT: .p2align 2 Index: llvm/trunk/test/CodeGen/WebAssembly/address-offsets.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/address-offsets.ll +++ llvm/trunk/test/CodeGen/WebAssembly/address-offsets.ll @@ -165,10 +165,10 @@ ; NON-PIC-NEXT: i32.load $push4=, 0($pop3){{$}} ; NON-PIC-NEXT: return $pop4{{$}} -; PIC-NEXT: global.get $push2=, g@GOT{{$}} ; PIC-NEXT: i32.const $push0=, 2{{$}} ; PIC-NEXT: i32.shl $push1=, $0, $pop0{{$}} -; PIC-NEXT: i32.add $push3=, $pop2, $pop1{{$}} +; PIC-NEXT: global.get $push2=, g@GOT{{$}} +; PIC-NEXT: i32.add $push3=, $pop1, $pop2{{$}} ; PIC-NEXT: i32.const $push4=, -40{{$}} ; PIC-NEXT: i32.add $push5=, $pop3, $pop4{{$}} ; PIC-NEXT: i32.load $push6=, 0($pop5){{$}} @@ -206,7 +206,7 @@ ; CHECK-NEXT: .functype load_test12 (i32, i32) -> (i32){{$}} ; CHECK-NEXT: i32.const $push0=, 2{{$}} ; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} -; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} ; CHECK-NEXT: i32.const $push3=, 40{{$}} ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} ; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} @@ -222,7 +222,7 @@ ; CHECK-NEXT: .functype load_test13 (i32, i32) -> (i32){{$}} ; CHECK-NEXT: i32.const $push0=, 2{{$}} ; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} -; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} ; CHECK-NEXT: i32.const $push3=, 40{{$}} ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} ; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} @@ -284,7 +284,7 @@ ; CHECK-NEXT: .functype load_test17 (i32, i32) -> (i32){{$}} ; CHECK-NEXT: i32.const $push0=, 2{{$}} ; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} -; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} ; CHECK-NEXT: i32.const $push3=, 40{{$}} ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} ; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} @@ -314,7 +314,7 @@ ; CHECK-NEXT: .functype load_test19 (i32, i32) -> (i32){{$}} ; CHECK-NEXT: i32.const $push0=, 2{{$}} ; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} -; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} ; CHECK-NEXT: i32.const $push3=, 40{{$}} ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} ; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} @@ -342,7 +342,7 @@ ; CHECK-NEXT: .functype load_test21 (i32, i32) -> (i32){{$}} ; CHECK-NEXT: i32.const $push0=, 2{{$}} ; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}} -; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}} +; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}} ; CHECK-NEXT: i32.const $push3=, -40{{$}} ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} ; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}} @@ -501,10 +501,10 @@ ; NON-PIC-NEXT: i32.const $push2=, g-40{{$}} ; NON-PIC-NEXT: i32.add $push3=, $pop1, $pop2{{$}} ; NON-PIC-NEXT: i32.store 0($pop3), $1{{$}} -; PIC-NEXT: global.get $push2=, g@GOT{{$}} ; PIC-NEXT: i32.const $push0=, 2{{$}} ; PIC-NEXT: i32.shl $push1=, $0, $pop0{{$}} -; PIC-NEXT: i32.add $push3=, $pop2, $pop1{{$}} +; PIC-NEXT: global.get $push2=, g@GOT{{$}} +; PIC-NEXT: i32.add $push3=, $pop1, $pop2{{$}} ; PIC-NEXT: i32.const $push4=, -40{{$}} ; PIC-NEXT: i32.add $push5=, $pop3, $pop4{{$}} ; PIC-NEXT: i32.store 0($pop5), $1{{$}} @@ -542,7 +542,7 @@ ; CHECK-NEXT: .functype store_test12 (i32, i32, i32) -> (){{$}} ; NON-PIC-NEXT: i32.const $push0=, 2{{$}} ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}} -; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}} +; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}} ; NON-PIC-NEXT: i32.const $push3=, 40{{$}} ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}} ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}} @@ -558,7 +558,7 @@ ; CHECK-NEXT: .functype store_test13 (i32, i32, i32) -> (){{$}} ; NON-PIC-NEXT: i32.const $push0=, 2{{$}} ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}} -; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}} +; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}} ; NON-PIC-NEXT: i32.const $push3=, 40{{$}} ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}} ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}} @@ -620,7 +620,7 @@ ; CHECK-NEXT: .functype store_test17 (i32, i32, i32) -> (){{$}} ; NON-PIC-NEXT: i32.const $push0=, 2{{$}} ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}} -; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}} +; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}} ; NON-PIC-NEXT: i32.const $push3=, 40{{$}} ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}} ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}} @@ -650,7 +650,7 @@ ; CHECK-NEXT: .functype store_test19 (i32, i32, i32) -> (){{$}} ; NON-PIC-NEXT: i32.const $push0=, 2{{$}} ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}} -; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}} +; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}} ; NON-PIC-NEXT: i32.const $push3=, 40{{$}} ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}} ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}} @@ -678,7 +678,7 @@ ; CHECK-NEXT: .functype store_test21 (i32, i32, i32) -> (){{$}} ; NON-PIC-NEXT: i32.const $push0=, 2{{$}} ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}} -; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}} +; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}} ; NON-PIC-NEXT: i32.const $push3=, -40{{$}} ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}} ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}} Index: llvm/trunk/test/CodeGen/X86/add-ext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/add-ext.ll +++ llvm/trunk/test/CodeGen/X86/add-ext.ll @@ -26,7 +26,7 @@ ; CHECK-LABEL: add_nsw_sext_add: ; CHECK: # %bb.0: ; CHECK-NEXT: movslq %edi, %rax -; CHECK-NEXT: leaq 5(%rsi,%rax), %rax +; CHECK-NEXT: leaq 5(%rax,%rsi), %rax ; CHECK-NEXT: retq %add = add nsw i32 %i, 5 @@ -73,7 +73,7 @@ ; CHECK-LABEL: gep8: ; CHECK: # %bb.0: ; CHECK-NEXT: movslq %edi, %rax -; CHECK-NEXT: leaq 5(%rsi,%rax), %rax +; CHECK-NEXT: leaq 5(%rax,%rsi), %rax ; CHECK-NEXT: retq %add = add nsw i32 %i, 5 @@ -128,7 +128,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movslq %edi, %rax ; CHECK-NEXT: shlq $4, %rax -; CHECK-NEXT: leaq 80(%rsi,%rax), %rax +; CHECK-NEXT: leaq 80(%rax,%rsi), %rax ; CHECK-NEXT: retq %add = add nsw i32 %i, 5 @@ -169,12 +169,13 @@ ; The same as @PR20134 but sign extension is replaced with zero extension define void @PR20134_zext(i32* %a, i32 %i) { -; CHECK: # %bb.0: -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx -; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx -; CHECK-NEXT: movl %ecx, (%rdi,%rax,4) -; CHECK-NEXT: retq +; CHECK-LABEL: PR20134_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx +; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx +; CHECK-NEXT: movl %ecx, (%rdi,%rax,4) +; CHECK-NEXT: retq %add1 = add nuw i32 %i, 1 %idx1 = zext i32 %add1 to i64 Index: llvm/trunk/test/CodeGen/X86/combine-multiplies.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-multiplies.ll +++ llvm/trunk/test/CodeGen/X86/combine-multiplies.ll @@ -38,10 +38,10 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: imull $400, %ecx, %edx # imm = 0x190 -; CHECK-NEXT: leal (%eax,%edx), %esi +; CHECK-NEXT: leal (%edx,%eax), %esi ; CHECK-NEXT: movl $11, 2020(%esi,%ecx,4) -; CHECK-NEXT: movl $22, 2080(%eax,%edx) -; CHECK-NEXT: movl $33, 10080(%eax,%edx) +; CHECK-NEXT: movl $22, 2080(%edx,%eax) +; CHECK-NEXT: movl $33, 10080(%edx,%eax) ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl entry: Index: llvm/trunk/test/CodeGen/X86/load-combine.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/load-combine.ll +++ llvm/trunk/test/CodeGen/X86/load-combine.ll @@ -966,7 +966,7 @@ ; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %esi, %eax -; CHECK64-NEXT: movl 13(%rdi,%rax), %eax +; CHECK64-NEXT: movl 13(%rax,%rdi), %eax ; CHECK64-NEXT: retq %tmp = add nuw nsw i32 %i, 4 %tmp2 = add nuw nsw i32 %i, 3 @@ -1016,7 +1016,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl 12(%eax,%ecx), %eax +; CHECK-NEXT: movl 12(%ecx,%eax), %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: load_i32_by_i8_zaext_loads: @@ -1072,7 +1072,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl 12(%eax,%ecx), %eax +; CHECK-NEXT: movl 12(%ecx,%eax), %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: load_i32_by_i8_zsext_loads: Index: llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -66,7 +66,7 @@ ; GENERIC-NEXT: movzbl 2(%r8,%rbx,4), %ebx ; GENERIC-NEXT: shll $16, %ebx ; GENERIC-NEXT: orl %eax, %ebx -; GENERIC-NEXT: xorl 16(%rdx,%rcx), %ebx +; GENERIC-NEXT: xorl 16(%rcx,%rdx), %ebx ; GENERIC-NEXT: shrl $8, %edi ; GENERIC-NEXT: movzbl 3(%r9,%rdi,4), %eax ; GENERIC-NEXT: shll $24, %eax @@ -74,7 +74,7 @@ ; GENERIC-NEXT: movzbl 2(%r8,%rdi,4), %edi ; GENERIC-NEXT: shll $16, %edi ; GENERIC-NEXT: orl %eax, %edi -; GENERIC-NEXT: xorl 20(%rdx,%rcx), %edi +; GENERIC-NEXT: xorl 20(%rcx,%rdx), %edi ; GENERIC-NEXT: movl %ebx, %eax ; GENERIC-NEXT: shrl $24, %eax ; GENERIC-NEXT: movb %al, (%rsi) @@ -156,8 +156,8 @@ ; ATOM-NEXT: shll $16, %eax ; ATOM-NEXT: orl %edi, %ebp ; ATOM-NEXT: orl %r15d, %eax -; ATOM-NEXT: xorl 20(%rdx,%rcx), %ebp -; ATOM-NEXT: xorl 16(%rdx,%rcx), %eax +; ATOM-NEXT: xorl 20(%rcx,%rdx), %ebp +; ATOM-NEXT: xorl 16(%rcx,%rdx), %eax ; ATOM-NEXT: movl %eax, %edi ; ATOM-NEXT: shrl $16, %eax ; ATOM-NEXT: shrl $24, %edi Index: llvm/trunk/test/CodeGen/X86/merge_store.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/merge_store.ll +++ llvm/trunk/test/CodeGen/X86/merge_store.ll @@ -44,7 +44,7 @@ define void @indexed_store_merge(i64 %p, i8* %v) { ; CHECK-LABEL: indexed_store_merge: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $0, 2(%rsi,%rdi) +; CHECK-NEXT: movl $0, 2(%rdi,%rsi) ; CHECK-NEXT: movb $0, (%rsi) ; CHECK-NEXT: retq entry: Index: llvm/trunk/test/CodeGen/X86/sad.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sad.ll +++ llvm/trunk/test/CodeGen/X86/sad.ll @@ -1403,18 +1403,18 @@ ; SSE2-NEXT: movdqu (%rdi), %xmm0 ; SSE2-NEXT: movdqu (%rsi), %xmm1 ; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: movdqu (%rdx), %xmm0 +; SSE2-NEXT: movdqu (%rcx), %xmm2 +; SSE2-NEXT: psadbw %xmm0, %xmm2 ; SSE2-NEXT: movl $1, %eax ; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: paddd %xmm1, %xmm0 -; SSE2-NEXT: movdqu (%rdx), %xmm1 -; SSE2-NEXT: movdqu (%rcx), %xmm2 -; SSE2-NEXT: psadbw %xmm1, %xmm2 -; SSE2-NEXT: paddd %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] ; SSE2-NEXT: paddd %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE2-NEXT: paddd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: paddd %xmm1, %xmm0 +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; AVX1-LABEL: sad_unroll_nonzero_initial: @@ -1425,8 +1425,8 @@ ; AVX1-NEXT: vpsadbw (%rcx), %xmm1, %xmm1 ; AVX1-NEXT: movl $1, %eax ; AVX1-NEXT: vmovd %eax, %xmm2 -; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] @@ -1438,12 +1438,12 @@ ; AVX2: # %bb.0: # %bb ; AVX2-NEXT: vmovdqu (%rdi), %xmm0 ; AVX2-NEXT: vpsadbw (%rsi), %xmm0, %xmm0 -; AVX2-NEXT: movl $1, %eax -; AVX2-NEXT: vmovd %eax, %xmm1 -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vmovdqu (%rdx), %xmm1 ; AVX2-NEXT: vpsadbw (%rcx), %xmm1, %xmm1 -; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: movl $1, %eax +; AVX2-NEXT: vmovd %eax, %xmm2 +; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] @@ -1458,12 +1458,12 @@ ; AVX512: # %bb.0: # %bb ; AVX512-NEXT: vmovdqu (%rdi), %xmm0 ; AVX512-NEXT: vpsadbw (%rsi), %xmm0, %xmm0 -; AVX512-NEXT: movl $1, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 -; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vmovdqu (%rdx), %xmm1 ; AVX512-NEXT: vpsadbw (%rcx), %xmm1, %xmm1 -; AVX512-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: movl $1, %eax +; AVX512-NEXT: vmovd %eax, %xmm2 +; AVX512-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 Index: llvm/trunk/test/CodeGen/X86/vector-ext-logic.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-ext-logic.ll +++ llvm/trunk/test/CodeGen/X86/vector-ext-logic.ll @@ -146,7 +146,7 @@ ; ; AVX2-LABEL: zext_and_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vandps %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq %xz = zext <8 x i8> %x to <8 x i16>