diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1920,8 +1920,8 @@ DAG.getDataLayout().getAllocaAddrSpace()), PtrValueVTs); - SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - DemoteReg, PtrValueVTs[0]); + SDValue RetPtr = + DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); SmallVector ValueVTs, MemVTs; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll @@ -101,56 +101,56 @@ define fastcc <128 x i32> @ret_split_v128i32(<128 x i32>* %x) { ; LMULMAX8-LABEL: ret_split_v128i32: ; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a2, 32 -; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; LMULMAX8-NEXT: vle32.v v8, (a1) ; LMULMAX8-NEXT: addi a2, a1, 128 -; LMULMAX8-NEXT: vle32.v v16, (a2) -; LMULMAX8-NEXT: addi a2, a1, 384 -; LMULMAX8-NEXT: vle32.v v24, (a2) -; LMULMAX8-NEXT: addi a1, a1, 256 -; LMULMAX8-NEXT: vle32.v v0, (a1) +; LMULMAX8-NEXT: li a3, 32 +; LMULMAX8-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; LMULMAX8-NEXT: vle32.v v8, (a2) +; LMULMAX8-NEXT: addi a2, a1, 256 +; LMULMAX8-NEXT: vle32.v v16, (a1) +; LMULMAX8-NEXT: addi a1, a1, 384 +; LMULMAX8-NEXT: vle32.v v24, (a1) +; LMULMAX8-NEXT: vle32.v v0, (a2) +; LMULMAX8-NEXT: vse32.v v16, (a0) ; LMULMAX8-NEXT: addi a1, a0, 384 ; LMULMAX8-NEXT: vse32.v v24, (a1) ; LMULMAX8-NEXT: addi a1, a0, 256 ; LMULMAX8-NEXT: vse32.v v0, (a1) -; LMULMAX8-NEXT: addi a1, a0, 128 -; LMULMAX8-NEXT: vse32.v v16, (a1) +; LMULMAX8-NEXT: addi a0, a0, 128 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret ; ; LMULMAX4-LABEL: ret_split_v128i32: ; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; LMULMAX4-NEXT: vle32.v v8, (a1) ; LMULMAX4-NEXT: addi a2, a1, 64 -; LMULMAX4-NEXT: vle32.v v12, (a2) +; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; LMULMAX4-NEXT: vle32.v v8, (a2) ; LMULMAX4-NEXT: addi a2, a1, 128 -; LMULMAX4-NEXT: vle32.v v16, (a2) +; LMULMAX4-NEXT: vle32.v v12, (a2) ; LMULMAX4-NEXT: addi a2, a1, 192 -; LMULMAX4-NEXT: vle32.v v20, (a2) +; LMULMAX4-NEXT: vle32.v v16, (a2) ; LMULMAX4-NEXT: addi a2, a1, 256 -; LMULMAX4-NEXT: vle32.v v24, (a2) +; LMULMAX4-NEXT: vle32.v v20, (a2) ; LMULMAX4-NEXT: addi a2, a1, 320 -; LMULMAX4-NEXT: vle32.v v28, (a2) -; LMULMAX4-NEXT: addi a2, a1, 448 -; LMULMAX4-NEXT: vle32.v v0, (a2) -; LMULMAX4-NEXT: addi a1, a1, 384 -; LMULMAX4-NEXT: vle32.v v4, (a1) +; LMULMAX4-NEXT: vle32.v v24, (a2) +; LMULMAX4-NEXT: addi a2, a1, 384 +; LMULMAX4-NEXT: vle32.v v28, (a1) +; LMULMAX4-NEXT: addi a1, a1, 448 +; LMULMAX4-NEXT: vle32.v v0, (a1) +; LMULMAX4-NEXT: vle32.v v4, (a2) +; LMULMAX4-NEXT: vse32.v v28, (a0) ; LMULMAX4-NEXT: addi a1, a0, 448 ; LMULMAX4-NEXT: vse32.v v0, (a1) ; LMULMAX4-NEXT: addi a1, a0, 384 ; LMULMAX4-NEXT: vse32.v v4, (a1) ; LMULMAX4-NEXT: addi a1, a0, 320 -; LMULMAX4-NEXT: vse32.v v28, (a1) -; LMULMAX4-NEXT: addi a1, a0, 256 ; LMULMAX4-NEXT: vse32.v v24, (a1) -; LMULMAX4-NEXT: addi a1, a0, 192 +; LMULMAX4-NEXT: addi a1, a0, 256 ; LMULMAX4-NEXT: vse32.v v20, (a1) -; LMULMAX4-NEXT: addi a1, a0, 128 +; LMULMAX4-NEXT: addi a1, a0, 192 ; LMULMAX4-NEXT: vse32.v v16, (a1) -; LMULMAX4-NEXT: addi a1, a0, 64 +; LMULMAX4-NEXT: addi a1, a0, 128 ; LMULMAX4-NEXT: vse32.v v12, (a1) +; LMULMAX4-NEXT: addi a0, a0, 64 ; LMULMAX4-NEXT: vse32.v v8, (a0) ; LMULMAX4-NEXT: ret %v = load <128 x i32>, <128 x i32>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -233,254 +233,254 @@ define <128 x i32> @ret_split_v128i32(<128 x i32>* %x) { ; LMULMAX8-LABEL: ret_split_v128i32: ; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: li a2, 32 -; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; LMULMAX8-NEXT: vle32.v v8, (a1) ; LMULMAX8-NEXT: addi a2, a1, 128 -; LMULMAX8-NEXT: vle32.v v16, (a2) -; LMULMAX8-NEXT: addi a2, a1, 384 -; LMULMAX8-NEXT: vle32.v v24, (a2) -; LMULMAX8-NEXT: addi a1, a1, 256 -; LMULMAX8-NEXT: vle32.v v0, (a1) +; LMULMAX8-NEXT: li a3, 32 +; LMULMAX8-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; LMULMAX8-NEXT: vle32.v v8, (a2) +; LMULMAX8-NEXT: addi a2, a1, 256 +; LMULMAX8-NEXT: vle32.v v16, (a1) +; LMULMAX8-NEXT: addi a1, a1, 384 +; LMULMAX8-NEXT: vle32.v v24, (a1) +; LMULMAX8-NEXT: vle32.v v0, (a2) +; LMULMAX8-NEXT: vse32.v v16, (a0) ; LMULMAX8-NEXT: addi a1, a0, 384 ; LMULMAX8-NEXT: vse32.v v24, (a1) ; LMULMAX8-NEXT: addi a1, a0, 256 ; LMULMAX8-NEXT: vse32.v v0, (a1) -; LMULMAX8-NEXT: addi a1, a0, 128 -; LMULMAX8-NEXT: vse32.v v16, (a1) +; LMULMAX8-NEXT: addi a0, a0, 128 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret ; ; LMULMAX4-LABEL: ret_split_v128i32: ; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; LMULMAX4-NEXT: vle32.v v8, (a1) ; LMULMAX4-NEXT: addi a2, a1, 64 -; LMULMAX4-NEXT: vle32.v v12, (a2) +; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; LMULMAX4-NEXT: vle32.v v8, (a2) ; LMULMAX4-NEXT: addi a2, a1, 128 -; LMULMAX4-NEXT: vle32.v v16, (a2) +; LMULMAX4-NEXT: vle32.v v12, (a2) ; LMULMAX4-NEXT: addi a2, a1, 192 -; LMULMAX4-NEXT: vle32.v v20, (a2) +; LMULMAX4-NEXT: vle32.v v16, (a2) ; LMULMAX4-NEXT: addi a2, a1, 256 -; LMULMAX4-NEXT: vle32.v v24, (a2) +; LMULMAX4-NEXT: vle32.v v20, (a2) ; LMULMAX4-NEXT: addi a2, a1, 320 -; LMULMAX4-NEXT: vle32.v v28, (a2) -; LMULMAX4-NEXT: addi a2, a1, 448 -; LMULMAX4-NEXT: vle32.v v0, (a2) -; LMULMAX4-NEXT: addi a1, a1, 384 -; LMULMAX4-NEXT: vle32.v v4, (a1) +; LMULMAX4-NEXT: vle32.v v24, (a2) +; LMULMAX4-NEXT: addi a2, a1, 384 +; LMULMAX4-NEXT: vle32.v v28, (a1) +; LMULMAX4-NEXT: addi a1, a1, 448 +; LMULMAX4-NEXT: vle32.v v0, (a1) +; LMULMAX4-NEXT: vle32.v v4, (a2) +; LMULMAX4-NEXT: vse32.v v28, (a0) ; LMULMAX4-NEXT: addi a1, a0, 448 ; LMULMAX4-NEXT: vse32.v v0, (a1) ; LMULMAX4-NEXT: addi a1, a0, 384 ; LMULMAX4-NEXT: vse32.v v4, (a1) ; LMULMAX4-NEXT: addi a1, a0, 320 -; LMULMAX4-NEXT: vse32.v v28, (a1) -; LMULMAX4-NEXT: addi a1, a0, 256 ; LMULMAX4-NEXT: vse32.v v24, (a1) -; LMULMAX4-NEXT: addi a1, a0, 192 +; LMULMAX4-NEXT: addi a1, a0, 256 ; LMULMAX4-NEXT: vse32.v v20, (a1) -; LMULMAX4-NEXT: addi a1, a0, 128 +; LMULMAX4-NEXT: addi a1, a0, 192 ; LMULMAX4-NEXT: vse32.v v16, (a1) -; LMULMAX4-NEXT: addi a1, a0, 64 +; LMULMAX4-NEXT: addi a1, a0, 128 ; LMULMAX4-NEXT: vse32.v v12, (a1) +; LMULMAX4-NEXT: addi a0, a0, 64 ; LMULMAX4-NEXT: vse32.v v8, (a0) ; LMULMAX4-NEXT: ret ; ; LMULMAX2-LABEL: ret_split_v128i32: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-NEXT: vle32.v v8, (a1) ; LMULMAX2-NEXT: addi a2, a1, 32 -; LMULMAX2-NEXT: vle32.v v10, (a2) +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; LMULMAX2-NEXT: vle32.v v8, (a2) ; LMULMAX2-NEXT: addi a2, a1, 64 -; LMULMAX2-NEXT: vle32.v v12, (a2) +; LMULMAX2-NEXT: vle32.v v10, (a2) ; LMULMAX2-NEXT: addi a2, a1, 96 -; LMULMAX2-NEXT: vle32.v v14, (a2) +; LMULMAX2-NEXT: vle32.v v12, (a2) ; LMULMAX2-NEXT: addi a2, a1, 128 -; LMULMAX2-NEXT: vle32.v v16, (a2) +; LMULMAX2-NEXT: vle32.v v14, (a2) ; LMULMAX2-NEXT: addi a2, a1, 160 -; LMULMAX2-NEXT: vle32.v v18, (a2) +; LMULMAX2-NEXT: vle32.v v16, (a2) ; LMULMAX2-NEXT: addi a2, a1, 192 -; LMULMAX2-NEXT: vle32.v v20, (a2) +; LMULMAX2-NEXT: vle32.v v18, (a2) ; LMULMAX2-NEXT: addi a2, a1, 224 -; LMULMAX2-NEXT: vle32.v v22, (a2) +; LMULMAX2-NEXT: vle32.v v20, (a2) ; LMULMAX2-NEXT: addi a2, a1, 256 -; LMULMAX2-NEXT: vle32.v v24, (a2) +; LMULMAX2-NEXT: vle32.v v22, (a2) ; LMULMAX2-NEXT: addi a2, a1, 288 -; LMULMAX2-NEXT: vle32.v v26, (a2) +; LMULMAX2-NEXT: vle32.v v24, (a2) ; LMULMAX2-NEXT: addi a2, a1, 320 -; LMULMAX2-NEXT: vle32.v v28, (a2) +; LMULMAX2-NEXT: vle32.v v26, (a2) ; LMULMAX2-NEXT: addi a2, a1, 352 -; LMULMAX2-NEXT: vle32.v v30, (a2) +; LMULMAX2-NEXT: vle32.v v28, (a2) ; LMULMAX2-NEXT: addi a2, a1, 384 -; LMULMAX2-NEXT: vle32.v v0, (a2) +; LMULMAX2-NEXT: vle32.v v30, (a2) ; LMULMAX2-NEXT: addi a2, a1, 416 -; LMULMAX2-NEXT: vle32.v v2, (a2) -; LMULMAX2-NEXT: addi a2, a1, 480 -; LMULMAX2-NEXT: vle32.v v4, (a2) -; LMULMAX2-NEXT: addi a1, a1, 448 -; LMULMAX2-NEXT: vle32.v v6, (a1) +; LMULMAX2-NEXT: vle32.v v0, (a2) +; LMULMAX2-NEXT: addi a2, a1, 448 +; LMULMAX2-NEXT: vle32.v v2, (a1) +; LMULMAX2-NEXT: addi a1, a1, 480 +; LMULMAX2-NEXT: vle32.v v4, (a1) +; LMULMAX2-NEXT: vle32.v v6, (a2) +; LMULMAX2-NEXT: vse32.v v2, (a0) ; LMULMAX2-NEXT: addi a1, a0, 480 ; LMULMAX2-NEXT: vse32.v v4, (a1) ; LMULMAX2-NEXT: addi a1, a0, 448 ; LMULMAX2-NEXT: vse32.v v6, (a1) ; LMULMAX2-NEXT: addi a1, a0, 416 -; LMULMAX2-NEXT: vse32.v v2, (a1) -; LMULMAX2-NEXT: addi a1, a0, 384 ; LMULMAX2-NEXT: vse32.v v0, (a1) -; LMULMAX2-NEXT: addi a1, a0, 352 +; LMULMAX2-NEXT: addi a1, a0, 384 ; LMULMAX2-NEXT: vse32.v v30, (a1) -; LMULMAX2-NEXT: addi a1, a0, 320 +; LMULMAX2-NEXT: addi a1, a0, 352 ; LMULMAX2-NEXT: vse32.v v28, (a1) -; LMULMAX2-NEXT: addi a1, a0, 288 +; LMULMAX2-NEXT: addi a1, a0, 320 ; LMULMAX2-NEXT: vse32.v v26, (a1) -; LMULMAX2-NEXT: addi a1, a0, 256 +; LMULMAX2-NEXT: addi a1, a0, 288 ; LMULMAX2-NEXT: vse32.v v24, (a1) -; LMULMAX2-NEXT: addi a1, a0, 224 +; LMULMAX2-NEXT: addi a1, a0, 256 ; LMULMAX2-NEXT: vse32.v v22, (a1) -; LMULMAX2-NEXT: addi a1, a0, 192 +; LMULMAX2-NEXT: addi a1, a0, 224 ; LMULMAX2-NEXT: vse32.v v20, (a1) -; LMULMAX2-NEXT: addi a1, a0, 160 +; LMULMAX2-NEXT: addi a1, a0, 192 ; LMULMAX2-NEXT: vse32.v v18, (a1) -; LMULMAX2-NEXT: addi a1, a0, 128 +; LMULMAX2-NEXT: addi a1, a0, 160 ; LMULMAX2-NEXT: vse32.v v16, (a1) -; LMULMAX2-NEXT: addi a1, a0, 96 +; LMULMAX2-NEXT: addi a1, a0, 128 ; LMULMAX2-NEXT: vse32.v v14, (a1) -; LMULMAX2-NEXT: addi a1, a0, 64 +; LMULMAX2-NEXT: addi a1, a0, 96 ; LMULMAX2-NEXT: vse32.v v12, (a1) -; LMULMAX2-NEXT: addi a1, a0, 32 +; LMULMAX2-NEXT: addi a1, a0, 64 ; LMULMAX2-NEXT: vse32.v v10, (a1) +; LMULMAX2-NEXT: addi a0, a0, 32 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: ret_split_v128i32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vle32.v v8, (a1) ; LMULMAX1-NEXT: addi a2, a1, 16 -; LMULMAX1-NEXT: vle32.v v9, (a2) +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX1-NEXT: vle32.v v8, (a2) ; LMULMAX1-NEXT: addi a2, a1, 32 -; LMULMAX1-NEXT: vle32.v v10, (a2) +; LMULMAX1-NEXT: vle32.v v9, (a2) ; LMULMAX1-NEXT: addi a2, a1, 48 -; LMULMAX1-NEXT: vle32.v v11, (a2) +; LMULMAX1-NEXT: vle32.v v10, (a2) ; LMULMAX1-NEXT: addi a2, a1, 64 -; LMULMAX1-NEXT: vle32.v v12, (a2) +; LMULMAX1-NEXT: vle32.v v11, (a2) ; LMULMAX1-NEXT: addi a2, a1, 80 -; LMULMAX1-NEXT: vle32.v v13, (a2) +; LMULMAX1-NEXT: vle32.v v12, (a2) ; LMULMAX1-NEXT: addi a2, a1, 96 -; LMULMAX1-NEXT: vle32.v v14, (a2) +; LMULMAX1-NEXT: vle32.v v13, (a2) ; LMULMAX1-NEXT: addi a2, a1, 112 -; LMULMAX1-NEXT: vle32.v v15, (a2) +; LMULMAX1-NEXT: vle32.v v14, (a2) ; LMULMAX1-NEXT: addi a2, a1, 128 -; LMULMAX1-NEXT: vle32.v v16, (a2) +; LMULMAX1-NEXT: vle32.v v15, (a2) ; LMULMAX1-NEXT: addi a2, a1, 144 -; LMULMAX1-NEXT: vle32.v v17, (a2) +; LMULMAX1-NEXT: vle32.v v16, (a2) ; LMULMAX1-NEXT: addi a2, a1, 160 -; LMULMAX1-NEXT: vle32.v v18, (a2) +; LMULMAX1-NEXT: vle32.v v17, (a2) ; LMULMAX1-NEXT: addi a2, a1, 176 -; LMULMAX1-NEXT: vle32.v v19, (a2) +; LMULMAX1-NEXT: vle32.v v18, (a2) ; LMULMAX1-NEXT: addi a2, a1, 192 -; LMULMAX1-NEXT: vle32.v v20, (a2) +; LMULMAX1-NEXT: vle32.v v19, (a2) ; LMULMAX1-NEXT: addi a2, a1, 208 -; LMULMAX1-NEXT: vle32.v v21, (a2) +; LMULMAX1-NEXT: vle32.v v20, (a2) ; LMULMAX1-NEXT: addi a2, a1, 224 -; LMULMAX1-NEXT: vle32.v v22, (a2) +; LMULMAX1-NEXT: vle32.v v21, (a2) ; LMULMAX1-NEXT: addi a2, a1, 240 -; LMULMAX1-NEXT: vle32.v v23, (a2) +; LMULMAX1-NEXT: vle32.v v22, (a2) ; LMULMAX1-NEXT: addi a2, a1, 256 -; LMULMAX1-NEXT: vle32.v v24, (a2) +; LMULMAX1-NEXT: vle32.v v23, (a2) ; LMULMAX1-NEXT: addi a2, a1, 272 -; LMULMAX1-NEXT: vle32.v v25, (a2) +; LMULMAX1-NEXT: vle32.v v24, (a2) ; LMULMAX1-NEXT: addi a2, a1, 288 -; LMULMAX1-NEXT: vle32.v v26, (a2) +; LMULMAX1-NEXT: vle32.v v25, (a2) ; LMULMAX1-NEXT: addi a2, a1, 304 -; LMULMAX1-NEXT: vle32.v v27, (a2) +; LMULMAX1-NEXT: vle32.v v26, (a2) ; LMULMAX1-NEXT: addi a2, a1, 320 -; LMULMAX1-NEXT: vle32.v v28, (a2) +; LMULMAX1-NEXT: vle32.v v27, (a2) ; LMULMAX1-NEXT: addi a2, a1, 336 -; LMULMAX1-NEXT: vle32.v v29, (a2) +; LMULMAX1-NEXT: vle32.v v28, (a2) ; LMULMAX1-NEXT: addi a2, a1, 352 -; LMULMAX1-NEXT: vle32.v v30, (a2) +; LMULMAX1-NEXT: vle32.v v29, (a2) ; LMULMAX1-NEXT: addi a2, a1, 368 -; LMULMAX1-NEXT: vle32.v v31, (a2) +; LMULMAX1-NEXT: vle32.v v30, (a2) ; LMULMAX1-NEXT: addi a2, a1, 384 -; LMULMAX1-NEXT: vle32.v v0, (a2) +; LMULMAX1-NEXT: vle32.v v31, (a2) ; LMULMAX1-NEXT: addi a2, a1, 400 -; LMULMAX1-NEXT: vle32.v v1, (a2) +; LMULMAX1-NEXT: vle32.v v0, (a2) ; LMULMAX1-NEXT: addi a2, a1, 416 -; LMULMAX1-NEXT: vle32.v v2, (a2) +; LMULMAX1-NEXT: vle32.v v1, (a2) ; LMULMAX1-NEXT: addi a2, a1, 432 -; LMULMAX1-NEXT: vle32.v v3, (a2) +; LMULMAX1-NEXT: vle32.v v2, (a2) ; LMULMAX1-NEXT: addi a2, a1, 448 -; LMULMAX1-NEXT: vle32.v v4, (a2) +; LMULMAX1-NEXT: vle32.v v3, (a2) ; LMULMAX1-NEXT: addi a2, a1, 464 -; LMULMAX1-NEXT: vle32.v v5, (a2) -; LMULMAX1-NEXT: addi a2, a1, 496 -; LMULMAX1-NEXT: vle32.v v6, (a2) -; LMULMAX1-NEXT: addi a1, a1, 480 -; LMULMAX1-NEXT: vle32.v v7, (a1) +; LMULMAX1-NEXT: vle32.v v4, (a2) +; LMULMAX1-NEXT: addi a2, a1, 480 +; LMULMAX1-NEXT: vle32.v v5, (a1) +; LMULMAX1-NEXT: addi a1, a1, 496 +; LMULMAX1-NEXT: vle32.v v6, (a1) +; LMULMAX1-NEXT: vle32.v v7, (a2) +; LMULMAX1-NEXT: vse32.v v5, (a0) ; LMULMAX1-NEXT: addi a1, a0, 496 ; LMULMAX1-NEXT: vse32.v v6, (a1) ; LMULMAX1-NEXT: addi a1, a0, 480 ; LMULMAX1-NEXT: vse32.v v7, (a1) ; LMULMAX1-NEXT: addi a1, a0, 464 -; LMULMAX1-NEXT: vse32.v v5, (a1) -; LMULMAX1-NEXT: addi a1, a0, 448 ; LMULMAX1-NEXT: vse32.v v4, (a1) -; LMULMAX1-NEXT: addi a1, a0, 432 +; LMULMAX1-NEXT: addi a1, a0, 448 ; LMULMAX1-NEXT: vse32.v v3, (a1) -; LMULMAX1-NEXT: addi a1, a0, 416 +; LMULMAX1-NEXT: addi a1, a0, 432 ; LMULMAX1-NEXT: vse32.v v2, (a1) -; LMULMAX1-NEXT: addi a1, a0, 400 +; LMULMAX1-NEXT: addi a1, a0, 416 ; LMULMAX1-NEXT: vse32.v v1, (a1) -; LMULMAX1-NEXT: addi a1, a0, 384 +; LMULMAX1-NEXT: addi a1, a0, 400 ; LMULMAX1-NEXT: vse32.v v0, (a1) -; LMULMAX1-NEXT: addi a1, a0, 368 +; LMULMAX1-NEXT: addi a1, a0, 384 ; LMULMAX1-NEXT: vse32.v v31, (a1) -; LMULMAX1-NEXT: addi a1, a0, 352 +; LMULMAX1-NEXT: addi a1, a0, 368 ; LMULMAX1-NEXT: vse32.v v30, (a1) -; LMULMAX1-NEXT: addi a1, a0, 336 +; LMULMAX1-NEXT: addi a1, a0, 352 ; LMULMAX1-NEXT: vse32.v v29, (a1) -; LMULMAX1-NEXT: addi a1, a0, 320 +; LMULMAX1-NEXT: addi a1, a0, 336 ; LMULMAX1-NEXT: vse32.v v28, (a1) -; LMULMAX1-NEXT: addi a1, a0, 304 +; LMULMAX1-NEXT: addi a1, a0, 320 ; LMULMAX1-NEXT: vse32.v v27, (a1) -; LMULMAX1-NEXT: addi a1, a0, 288 +; LMULMAX1-NEXT: addi a1, a0, 304 ; LMULMAX1-NEXT: vse32.v v26, (a1) -; LMULMAX1-NEXT: addi a1, a0, 272 +; LMULMAX1-NEXT: addi a1, a0, 288 ; LMULMAX1-NEXT: vse32.v v25, (a1) -; LMULMAX1-NEXT: addi a1, a0, 256 +; LMULMAX1-NEXT: addi a1, a0, 272 ; LMULMAX1-NEXT: vse32.v v24, (a1) -; LMULMAX1-NEXT: addi a1, a0, 240 +; LMULMAX1-NEXT: addi a1, a0, 256 ; LMULMAX1-NEXT: vse32.v v23, (a1) -; LMULMAX1-NEXT: addi a1, a0, 224 +; LMULMAX1-NEXT: addi a1, a0, 240 ; LMULMAX1-NEXT: vse32.v v22, (a1) -; LMULMAX1-NEXT: addi a1, a0, 208 +; LMULMAX1-NEXT: addi a1, a0, 224 ; LMULMAX1-NEXT: vse32.v v21, (a1) -; LMULMAX1-NEXT: addi a1, a0, 192 +; LMULMAX1-NEXT: addi a1, a0, 208 ; LMULMAX1-NEXT: vse32.v v20, (a1) -; LMULMAX1-NEXT: addi a1, a0, 176 +; LMULMAX1-NEXT: addi a1, a0, 192 ; LMULMAX1-NEXT: vse32.v v19, (a1) -; LMULMAX1-NEXT: addi a1, a0, 160 +; LMULMAX1-NEXT: addi a1, a0, 176 ; LMULMAX1-NEXT: vse32.v v18, (a1) -; LMULMAX1-NEXT: addi a1, a0, 144 +; LMULMAX1-NEXT: addi a1, a0, 160 ; LMULMAX1-NEXT: vse32.v v17, (a1) -; LMULMAX1-NEXT: addi a1, a0, 128 +; LMULMAX1-NEXT: addi a1, a0, 144 ; LMULMAX1-NEXT: vse32.v v16, (a1) -; LMULMAX1-NEXT: addi a1, a0, 112 +; LMULMAX1-NEXT: addi a1, a0, 128 ; LMULMAX1-NEXT: vse32.v v15, (a1) -; LMULMAX1-NEXT: addi a1, a0, 96 +; LMULMAX1-NEXT: addi a1, a0, 112 ; LMULMAX1-NEXT: vse32.v v14, (a1) -; LMULMAX1-NEXT: addi a1, a0, 80 +; LMULMAX1-NEXT: addi a1, a0, 96 ; LMULMAX1-NEXT: vse32.v v13, (a1) -; LMULMAX1-NEXT: addi a1, a0, 64 +; LMULMAX1-NEXT: addi a1, a0, 80 ; LMULMAX1-NEXT: vse32.v v12, (a1) -; LMULMAX1-NEXT: addi a1, a0, 48 +; LMULMAX1-NEXT: addi a1, a0, 64 ; LMULMAX1-NEXT: vse32.v v11, (a1) -; LMULMAX1-NEXT: addi a1, a0, 32 +; LMULMAX1-NEXT: addi a1, a0, 48 ; LMULMAX1-NEXT: vse32.v v10, (a1) -; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: addi a1, a0, 32 ; LMULMAX1-NEXT: vse32.v v9, (a1) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret %v = load <128 x i32>, <128 x i32>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/pr52475.ll b/llvm/test/CodeGen/RISCV/rvv/pr52475.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/pr52475.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \ +; RUN: -pre-RA-sched=list-burr -disable-machine-cse -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \ +; RUN: -pre-RA-sched=list-burr -disable-machine-cse -verify-machineinstrs < %s | FileCheck %s + +define <128 x i32> @ret_split_v128i32(<128 x i32>* %x) { +; CHECK-LABEL: ret_split_v128i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a2, a1, 448 +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: addi a2, a0, 448 +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: addi a2, a1, 384 +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: addi a2, a1, 320 +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: addi a2, a0, 320 +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: addi a2, a1, 256 +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: addi a2, a0, 256 +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: addi a2, a1, 192 +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: addi a2, a0, 192 +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: addi a2, a1, 128 +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: vse32.v v8, (a2) +; CHECK-NEXT: addi a1, a1, 64 +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: addi a0, a0, 64 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %v = load <128 x i32>, <128 x i32>* %x + ret <128 x i32> %v +} diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -842,10 +842,10 @@ ; RV32I-NEXT: addi a1, a0, 1327 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: sh zero, 0(s0) ; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: sh s1, 4(s0) ; RV32I-NEXT: sh s3, 2(s0) +; RV32I-NEXT: sh zero, 0(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -919,10 +919,10 @@ ; RV64I-NEXT: addiw a1, a0, 1327 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sh zero, 0(s0) ; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: sh s1, 4(s0) ; RV64I-NEXT: sh s3, 2(s0) +; RV64I-NEXT: sh zero, 0(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1018,9 +1018,9 @@ ; RV32I-NEXT: addi a1, a0, 1327 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: sh zero, 0(s0) ; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: sh s1, 4(s0) +; RV32I-NEXT: sh zero, 0(s0) ; RV32I-NEXT: sh s3, 2(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1090,9 +1090,9 @@ ; RV64I-NEXT: addiw a1, a0, 1327 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sh zero, 0(s0) ; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: sh s1, 4(s0) +; RV64I-NEXT: sh zero, 0(s0) ; RV64I-NEXT: sh s3, 2(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1321,10 +1321,10 @@ ; RV64I-NEXT: addiw a1, a0, 1327 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sd zero, 0(s0) ; RV64I-NEXT: sd a0, 24(s0) ; RV64I-NEXT: sd s1, 16(s0) ; RV64I-NEXT: sd s3, 8(s0) +; RV64I-NEXT: sd zero, 0(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -799,10 +799,10 @@ ; RV32I-NEXT: addi a1, a0, 1327 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: sh zero, 0(s0) ; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: sh s1, 4(s0) ; RV32I-NEXT: sh s3, 2(s0) +; RV32I-NEXT: sh zero, 0(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -869,10 +869,10 @@ ; RV64I-NEXT: addiw a1, a0, 1327 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: sh zero, 0(s0) ; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: sh s1, 4(s0) ; RV64I-NEXT: sh s3, 2(s0) +; RV64I-NEXT: sh zero, 0(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1116,10 +1116,10 @@ ; RV64I-NEXT: addiw a1, a0, 1327 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: sd zero, 0(s0) ; RV64I-NEXT: sd a0, 24(s0) ; RV64I-NEXT: sd s1, 16(s0) ; RV64I-NEXT: sd s3, 8(s0) +; RV64I-NEXT: sd zero, 0(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/WebAssembly/multi-return.ll b/llvm/test/CodeGen/WebAssembly/multi-return.ll --- a/llvm/test/CodeGen/WebAssembly/multi-return.ll +++ b/llvm/test/CodeGen/WebAssembly/multi-return.ll @@ -86,10 +86,10 @@ ; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]]) ; CHECK: i64.load $push2=, 16($[[SP]]) ; CHECK: i64.store 8($0), $pop2 +; CHECK: i64.store 0($0), $[[L2]] ; CHECK: i32.const $push12=, 16 ; CHECK: i32.add $push3=, $0, $pop12 ; CHECK: i64.store 0($pop3), $[[L1]] -; CHECK: i64.store 0($0), $[[L2]] %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 @@ -101,20 +101,20 @@ define { i128, i128 } @test6() { ; CHECK-LABEL: test6 ; CHECK: call return_multi_multi -; CHECK: i32.const $push0=, 24 +; CHECK: i32.const $push0=, 64 ; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) -; CHECK: i32.const $push2=, 64 +; CHECK: i32.const $push2=, 24 ; CHECK: i32.add $push3=, $[[SP]], $pop2 ; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) ; CHECK: i64.load $[[L3:[0-9]+]]=, 16($[[SP]]) ; CHECK: i64.load $push4=, 56($[[SP]]) ; CHECK: i64.store 16($0), $pop4 +; CHECK: i64.store 0($0), $[[L3]] +; CHECK: i64.store 8($0), $[[L2]] ; CHECK: i32.const $push5=, 24 ; CHECK: i32.add $push6=, $0, $pop5 -; CHECK: i64.store 0($pop6), $[[L2]] -; CHECK: i64.store 0($0), $[[L3]] -; CHECK: i64.store 8($0), $[[L1]] +; CHECK: i64.store 0($pop6), $[[L1]] %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 %r3 = extractvalue { i64, i128, i192, i128, i64 } %t0, 3 @@ -126,22 +126,22 @@ define { i64, i192 } @test7() { ; CHECK-LABEL: test7 ; CHECK: call return_multi_multi -; CHECK: i32.const $push2=, 40 -; CHECK: i32.add $push3=, $[[SP:[0-9]+]], $pop2 -; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop3) -; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]]) -; CHECK: i64.load $[[L3:[0-9]+]]=, 32($[[SP]]) -; CHECK: i32.const $push0=, 24 -; CHECK: i32.add $push1=, $0, $pop0 -; CHECK: i32.const $push4=, 48 -; CHECK: i32.add $push5=, $[[SP]], $pop4 -; CHECK: i64.load $push6=, 0($pop5) -; CHECK: i64.store 0($pop1), $pop6 -; CHECK: i64.store 8($0), $[[L3]] +; CHECK: i32.const $push0=, 40 +; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 +; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) +; CHECK: i32.const $push2=, 48 +; CHECK: i32.add $push3=, $[[SP]], $pop2 +; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) +; CHECK: i64.load $[[L3:[0-9]+]]=, 8($[[SP]]) +; CHECK: i64.load $push4=, 32($[[SP]]) +; CHECK: i64.store 8($0), $pop4 +; CHECK: i64.store 0($0), $[[L3]] +; CHECK: i32.const $push5=, 24 +; CHECK: i32.add $push6=, $0, $pop5 +; CHECK: i64.store 0($pop6), $[[L2]] ; CHECK: i32.const $push7=, 16 ; CHECK: i32.add $push8=, $0, $pop7 ; CHECK: i64.store 0($pop8), $[[L1]] -; CHECK: i64.store 0($0), $[[L2]] %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r2 = extractvalue { i64, i128, i192, i128, i64 } %t0, 2 @@ -153,18 +153,18 @@ define { i128, i192, i128, i64 } @test8() { ; CHECK-LABEL: test8 ; CHECK: call return_multi_multi -; CHECK: i32.const $push0=, 64 -; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 -; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) ; CHECK: i32.const $push20=, 8 -; CHECK: i32.add $push21=, $[[SP]], $pop20 -; CHECK: i32.const $push2=, 32 -; CHECK: i32.add $push3=, $pop21, $pop2 +; CHECK: i32.add $push21=, $[[SP:[0-9]+]], $pop20 +; CHECK: i32.const $push0=, 32 +; CHECK: i32.add $push1=, $pop21, $pop0 +; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) +; CHECK: i32.const $push2=, 48 +; CHECK: i32.add $push3=, $[[SP]], $pop2 ; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) -; CHECK: i32.const $push4=, 48 +; CHECK: i32.const $push4=, 24 ; CHECK: i32.add $push5=, $[[SP]], $pop4 ; CHECK: i64.load $[[L3:[0-9]+]]=, 0($pop5) -; CHECK: i32.const $push6=, 24 +; CHECK: i32.const $push6=, 64 ; CHECK: i32.add $push7=, $[[SP]], $pop6 ; CHECK: i64.load $[[L4:[0-9]+]]=, 0($pop7) ; CHECK: i64.load $[[L5:[0-9]+]]=, 8($[[SP]]) @@ -172,19 +172,19 @@ ; CHECK: i64.load $[[L7:[0-9]+]]=, 32($[[SP]]) ; CHECK: i64.load $push8=, 16($[[SP]]) ; CHECK: i64.store 40($0), $pop8 +; CHECK: i64.store 16($0), $[[L7]] +; CHECK: i64.store 0($0), $[[L6]] +; CHECK: i64.store 8($0), $[[L4]] +; CHECK: i64.store 56($0), $[[L5]] ; CHECK: i32.const $push9=, 48 ; CHECK: i32.add $push10=, $0, $pop9 -; CHECK: i64.store 0($pop10), $[[L4]] +; CHECK: i64.store 0($pop10), $[[L3]] ; CHECK: i32.const $push22=, 32 ; CHECK: i32.add $push11=, $0, $pop22 -; CHECK: i64.store 0($pop11), $[[L3]] -; CHECK: i64.store 16($0), $[[L7]] +; CHECK: i64.store 0($pop11), $[[L2]] ; CHECK: i32.const $push12=, 24 ; CHECK: i32.add $push13=, $0, $pop12 -; CHECK: i64.store 0($pop13), $[[L2]] -; CHECK: i64.store 0($0), $[[L6]] -; CHECK: i64.store 8($0), $[[L1]] -; CHECK: i64.store 56($0), $[[L5]] +; CHECK: i64.store 0($pop13), $[[L1]] %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -313,14 +313,14 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: f64x2.convert_low_i32x4_s -; CHECK-NEXT: v128.store 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: f64x2.convert_low_i32x4_s ; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.convert_low_i32x4_s +; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: # fallthrough-return %v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> %a = sitofp <4 x i32> %v to <4 x double> @@ -333,14 +333,14 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: f64x2.convert_low_i32x4_u -; CHECK-NEXT: v128.store 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: f64x2.convert_low_i32x4_u ; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.convert_low_i32x4_u +; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: # fallthrough-return %v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> %a = uitofp <4 x i32> %v to <4 x double> @@ -354,14 +354,14 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: f64x2.convert_low_i32x4_s -; CHECK-NEXT: v128.store 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: f64x2.convert_low_i32x4_s ; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.convert_low_i32x4_s +; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: # fallthrough-return %v = sitofp <8 x i32> %x to <8 x double> %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> @@ -374,14 +374,14 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: f64x2.convert_low_i32x4_u -; CHECK-NEXT: v128.store 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: f64x2.convert_low_i32x4_u ; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.convert_low_i32x4_u +; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: # fallthrough-return %v = uitofp <8 x i32> %x to <8 x double> %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> @@ -394,14 +394,14 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: f64x2.promote_low_f32x4 -; CHECK-NEXT: v128.store 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: f64x2.promote_low_f32x4 ; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.promote_low_f32x4 +; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: # fallthrough-return %v = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> %a = fpext <4 x float> %v to <4 x double> @@ -414,14 +414,14 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: f64x2.promote_low_f32x4 -; CHECK-NEXT: v128.store 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: f64x2.promote_low_f32x4 ; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.promote_low_f32x4 +; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: # fallthrough-return %v = fpext <8 x float> %x to <8 x double> %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -51,9 +51,9 @@ ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8 +; CHECK-NEXT: movq %rcx, 16(%rdi) ; CHECK-NEXT: movq %rdx, 8(%rdi) ; CHECK-NEXT: movq %rsi, (%rdi) -; CHECK-NEXT: movq %rcx, 16(%rdi) ; CHECK-NEXT: movq %r8, 24(%rdi) ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -1225,9 +1225,9 @@ ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, 8(%eax) ; X32-NEXT: movl %edx, 4(%eax) ; X32-NEXT: movl %ecx, (%eax) -; X32-NEXT: movl %esi, 8(%eax) ; X32-NEXT: movl %edi, 12(%eax) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/i128-add.ll b/llvm/test/CodeGen/X86/i128-add.ll --- a/llvm/test/CodeGen/X86/i128-add.ll +++ b/llvm/test/CodeGen/X86/i128-add.ll @@ -20,8 +20,8 @@ ; X86-NEXT: adcl $0, %edi ; X86-NEXT: adcl $0, %edx ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: popl %esi @@ -61,8 +61,8 @@ ; X86-NEXT: adcl $0, %edi ; X86-NEXT: adcl $0, %edx ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: movl %edx, 8(%eax) ; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll --- a/llvm/test/CodeGen/X86/masked_load.ll +++ b/llvm/test/CodeGen/X86/masked_load.ll @@ -7179,12 +7179,12 @@ ; SSE2-NEXT: movsd {{.*#+}} xmm8 = mem[0],zero ; SSE2-NEXT: movlhps {{.*#+}} xmm6 = xmm6[0],xmm8[0] ; SSE2-NEXT: movaps %xmm7, 112(%rdi) +; SSE2-NEXT: movaps %xmm5, 80(%rdi) ; SSE2-NEXT: movaps %xmm4, 64(%rdi) ; SSE2-NEXT: movaps %xmm3, 48(%rdi) ; SSE2-NEXT: movaps %xmm2, 32(%rdi) -; SSE2-NEXT: movaps %xmm0, (%rdi) -; SSE2-NEXT: movaps %xmm5, 80(%rdi) ; SSE2-NEXT: movaps %xmm1, 16(%rdi) +; SSE2-NEXT: movaps %xmm0, (%rdi) ; SSE2-NEXT: movaps %xmm6, 96(%rdi) ; SSE2-NEXT: retq ; @@ -7195,13 +7195,13 @@ ; SSE42-NEXT: pinsrq $0, 80(%rsi), %xmm5 ; SSE42-NEXT: pinsrq $1, 104(%rsi), %xmm6 ; SSE42-NEXT: movaps %xmm7, 112(%rdi) +; SSE42-NEXT: movdqa %xmm6, 96(%rdi) +; SSE42-NEXT: movdqa %xmm5, 80(%rdi) ; SSE42-NEXT: movaps %xmm4, 64(%rdi) ; SSE42-NEXT: movaps %xmm3, 48(%rdi) ; SSE42-NEXT: movaps %xmm2, 32(%rdi) -; SSE42-NEXT: movaps %xmm0, (%rdi) -; SSE42-NEXT: movdqa %xmm6, 96(%rdi) -; SSE42-NEXT: movdqa %xmm5, 80(%rdi) ; SSE42-NEXT: movdqa %xmm1, 16(%rdi) +; SSE42-NEXT: movaps %xmm0, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: load_one_mask_bit_set6: diff --git a/llvm/test/CodeGen/X86/mul128.ll b/llvm/test/CodeGen/X86/mul128.ll --- a/llvm/test/CodeGen/X86/mul128.ll +++ b/llvm/test/CodeGen/X86/mul128.ll @@ -52,13 +52,13 @@ ; X86-NEXT: imull %esi, %ecx ; X86-NEXT: addl %edx, %ecx ; X86-NEXT: addl %ebx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: adcl %edi, %ecx ; X86-NEXT: movl %esi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %ebx @@ -77,12 +77,12 @@ ; X86-NEXT: addl %edi, %eax ; X86-NEXT: movzbl %bl, %esi ; X86-NEXT: adcl %esi, %edx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%esp), %esi # 4-byte Reload -; X86-NEXT: movl %esi, (%ecx) ; X86-NEXT: movl %ebp, 4(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, (%ecx) ; X86-NEXT: movl %eax, 8(%ecx) ; X86-NEXT: movl %edx, 12(%ecx) ; X86-NEXT: movl %ecx, %eax diff --git a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll --- a/llvm/test/CodeGen/X86/pmulh.ll +++ b/llvm/test/CodeGen/X86/pmulh.ll @@ -1279,29 +1279,29 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: movq %rdi, %rax ; SSE41-NEXT: pmulhw %xmm4, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm4, %xmm4 -; SSE41-NEXT: pmulhw %xmm5, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm5, %xmm5 -; SSE41-NEXT: pmulhw %xmm6, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm6, %xmm6 -; SSE41-NEXT: pmulhw %xmm7, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm7, %xmm7 +; SSE41-NEXT: pmovsxwd %xmm0, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 +; SSE41-NEXT: pmulhw %xmm5, %xmm1 +; SSE41-NEXT: pmovsxwd %xmm1, %xmm5 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 +; SSE41-NEXT: pmulhw %xmm6, %xmm2 +; SSE41-NEXT: pmovsxwd %xmm2, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] ; SSE41-NEXT: pmovsxwd %xmm2, %xmm2 +; SSE41-NEXT: pmulhw %xmm7, %xmm3 +; SSE41-NEXT: pmovsxwd %xmm3, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] ; SSE41-NEXT: pmovsxwd %xmm3, %xmm3 -; SSE41-NEXT: movdqa %xmm3, 96(%rdi) -; SSE41-NEXT: movdqa %xmm2, 64(%rdi) -; SSE41-NEXT: movdqa %xmm1, 32(%rdi) -; SSE41-NEXT: movdqa %xmm0, (%rdi) -; SSE41-NEXT: movdqa %xmm7, 112(%rdi) -; SSE41-NEXT: movdqa %xmm6, 80(%rdi) -; SSE41-NEXT: movdqa %xmm5, 48(%rdi) -; SSE41-NEXT: movdqa %xmm4, 16(%rdi) +; SSE41-NEXT: movdqa %xmm3, 112(%rdi) +; SSE41-NEXT: movdqa %xmm7, 96(%rdi) +; SSE41-NEXT: movdqa %xmm2, 80(%rdi) +; SSE41-NEXT: movdqa %xmm6, 64(%rdi) +; SSE41-NEXT: movdqa %xmm1, 48(%rdi) +; SSE41-NEXT: movdqa %xmm5, 32(%rdi) +; SSE41-NEXT: movdqa %xmm0, 16(%rdi) +; SSE41-NEXT: movdqa %xmm4, (%rdi) ; SSE41-NEXT: retq ; ; AVX2-LABEL: mulhsw_v32i16_ashr: @@ -1770,53 +1770,53 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: movq %rdi, %rax ; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm8, %xmm8 +; SSE41-NEXT: pmovsxwd %xmm0, %xmm8 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; SSE41-NEXT: pmovsxwd %xmm0, %xmm9 ; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm1[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm9, %xmm9 +; SSE41-NEXT: pmovsxwd %xmm1, %xmm10 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE41-NEXT: pmovsxwd %xmm0, %xmm11 ; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm10, %xmm10 +; SSE41-NEXT: pmovsxwd %xmm2, %xmm12 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; SSE41-NEXT: pmovsxwd %xmm2, %xmm13 ; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm3[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm11, %xmm11 +; SSE41-NEXT: pmovsxwd %xmm3, %xmm14 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] +; SSE41-NEXT: pmovsxwd %xmm3, %xmm15 ; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm4 -; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm4[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm12, %xmm12 -; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm5 -; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm5[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm13, %xmm13 -; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm6 -; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm6[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm14, %xmm14 -; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm7 -; SSE41-NEXT: pshufd {{.*#+}} xmm15 = xmm7[2,3,2,3] -; SSE41-NEXT: pmovsxwd %xmm15, %xmm15 -; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 -; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 -; SSE41-NEXT: pmovsxwd %xmm2, %xmm2 -; SSE41-NEXT: pmovsxwd %xmm3, %xmm3 +; SSE41-NEXT: pmovsxwd %xmm4, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] ; SSE41-NEXT: pmovsxwd %xmm4, %xmm4 +; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm5 +; SSE41-NEXT: pmovsxwd %xmm5, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] ; SSE41-NEXT: pmovsxwd %xmm5, %xmm5 +; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm6 +; SSE41-NEXT: pmovsxwd %xmm6, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3] ; SSE41-NEXT: pmovsxwd %xmm6, %xmm6 +; SSE41-NEXT: pmulhw {{[0-9]+}}(%rsp), %xmm7 +; SSE41-NEXT: pmovsxwd %xmm7, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm7[2,3,2,3] ; SSE41-NEXT: pmovsxwd %xmm7, %xmm7 -; SSE41-NEXT: movdqa %xmm7, 224(%rdi) -; SSE41-NEXT: movdqa %xmm6, 192(%rdi) -; SSE41-NEXT: movdqa %xmm5, 160(%rdi) -; SSE41-NEXT: movdqa %xmm4, 128(%rdi) -; SSE41-NEXT: movdqa %xmm3, 96(%rdi) -; SSE41-NEXT: movdqa %xmm2, 64(%rdi) -; SSE41-NEXT: movdqa %xmm1, 32(%rdi) -; SSE41-NEXT: movdqa %xmm0, (%rdi) -; SSE41-NEXT: movdqa %xmm15, 240(%rdi) -; SSE41-NEXT: movdqa %xmm14, 208(%rdi) -; SSE41-NEXT: movdqa %xmm13, 176(%rdi) -; SSE41-NEXT: movdqa %xmm12, 144(%rdi) -; SSE41-NEXT: movdqa %xmm11, 112(%rdi) -; SSE41-NEXT: movdqa %xmm10, 80(%rdi) -; SSE41-NEXT: movdqa %xmm9, 48(%rdi) -; SSE41-NEXT: movdqa %xmm8, 16(%rdi) +; SSE41-NEXT: movdqa %xmm7, 240(%rdi) +; SSE41-NEXT: movdqa %xmm3, 224(%rdi) +; SSE41-NEXT: movdqa %xmm6, 208(%rdi) +; SSE41-NEXT: movdqa %xmm1, 192(%rdi) +; SSE41-NEXT: movdqa %xmm5, 176(%rdi) +; SSE41-NEXT: movdqa %xmm2, 160(%rdi) +; SSE41-NEXT: movdqa %xmm4, 144(%rdi) +; SSE41-NEXT: movdqa %xmm0, 128(%rdi) +; SSE41-NEXT: movdqa %xmm15, 112(%rdi) +; SSE41-NEXT: movdqa %xmm14, 96(%rdi) +; SSE41-NEXT: movdqa %xmm13, 80(%rdi) +; SSE41-NEXT: movdqa %xmm12, 64(%rdi) +; SSE41-NEXT: movdqa %xmm11, 48(%rdi) +; SSE41-NEXT: movdqa %xmm10, 32(%rdi) +; SSE41-NEXT: movdqa %xmm9, 16(%rdi) +; SSE41-NEXT: movdqa %xmm8, (%rdi) ; SSE41-NEXT: retq ; ; AVX2-LABEL: mulhsw_v64i16_ashr: diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll --- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -18,42 +18,40 @@ ; ILP-NEXT: xorl %r8d, %r8d ; ILP-NEXT: addl %esi, %esi ; ILP-NEXT: leal 3(%rsi), %r9d -; ILP-NEXT: movb $125, %r10b -; ILP-NEXT: movl $1, %edi -; ILP-NEXT: xorl %r11d, %r11d +; ILP-NEXT: movl $1, %r11d +; ILP-NEXT: xorl %r14d, %r14d ; ILP-NEXT: movl %r9d, %ecx -; ILP-NEXT: shldq %cl, %rdi, %r11 -; ILP-NEXT: subb %sil, %r10b -; ILP-NEXT: addb $-125, %sil -; ILP-NEXT: xorl %ebx, %ebx -; ILP-NEXT: movl %esi, %ecx -; ILP-NEXT: shldq %cl, %rdi, %rbx +; ILP-NEXT: shldq %cl, %r11, %r14 ; ILP-NEXT: movl $1, %edx ; ILP-NEXT: shlq %cl, %rdx -; ILP-NEXT: movl $1, %r14d +; ILP-NEXT: leal -125(%rsi), %r10d +; ILP-NEXT: xorl %ebx, %ebx ; ILP-NEXT: movl %r10d, %ecx -; ILP-NEXT: shrdq %cl, %r8, %r14 -; ILP-NEXT: movl %r9d, %ecx -; ILP-NEXT: shlq %cl, %rdi +; ILP-NEXT: shldq %cl, %r11, %rbx ; ILP-NEXT: testb $64, %r9b -; ILP-NEXT: cmovneq %rdi, %r11 -; ILP-NEXT: cmovneq %r8, %rdi -; ILP-NEXT: testb $64, %r10b -; ILP-NEXT: cmovneq %r8, %r14 -; ILP-NEXT: testb $64, %sil -; ILP-NEXT: cmovneq %rdx, %rbx +; ILP-NEXT: cmovneq %rdx, %r14 ; ILP-NEXT: cmovneq %r8, %rdx +; ILP-NEXT: movl $1, %edi +; ILP-NEXT: shlq %cl, %rdi +; ILP-NEXT: movb $125, %cl +; ILP-NEXT: subb %sil, %cl +; ILP-NEXT: shrdq %cl, %r8, %r11 +; ILP-NEXT: testb $64, %cl +; ILP-NEXT: cmovneq %r8, %r11 +; ILP-NEXT: testb $64, %r10b +; ILP-NEXT: cmovneq %rdi, %rbx +; ILP-NEXT: cmovneq %r8, %rdi ; ILP-NEXT: testb %r9b, %r9b -; ILP-NEXT: cmovsq %r8, %r11 -; ILP-NEXT: cmovsq %r8, %rdi -; ILP-NEXT: movq %r11, 8(%rax) -; ILP-NEXT: movq %rdi, (%rax) +; ILP-NEXT: cmovsq %r8, %r14 +; ILP-NEXT: cmovsq %r8, %rdx +; ILP-NEXT: movq %r14, 8(%rax) +; ILP-NEXT: movq %rdx, (%rax) ; ILP-NEXT: cmovnsq %r8, %rbx ; ILP-NEXT: cmoveq %r8, %rbx ; ILP-NEXT: movq %rbx, 24(%rax) -; ILP-NEXT: cmovnsq %r14, %rdx -; ILP-NEXT: cmoveq %r8, %rdx -; ILP-NEXT: movq %rdx, 16(%rax) +; ILP-NEXT: cmovnsq %r11, %rdi +; ILP-NEXT: cmoveq %r8, %rdi +; ILP-NEXT: movq %rdi, 16(%rax) ; ILP-NEXT: popq %rbx ; ILP-NEXT: popq %r14 ; ILP-NEXT: retq @@ -252,27 +250,24 @@ ; ILP-LABEL: test2: ; ILP: # %bb.0: ; ILP-NEXT: movq %rdi, %rax -; ILP-NEXT: xorl %edi, %edi +; ILP-NEXT: xorl %r9d, %r9d ; ILP-NEXT: movq %rsi, %r11 ; ILP-NEXT: negq %r11 ; ILP-NEXT: movl $0, %r10d ; ILP-NEXT: sbbq %rdx, %r10 -; ILP-NEXT: movl $0, %r9d -; ILP-NEXT: sbbq %rcx, %r9 -; ILP-NEXT: sbbq %r8, %rdi -; ILP-NEXT: andq %rcx, %r9 -; ILP-NEXT: bsrq %r9, %rcx -; ILP-NEXT: xorq $63, %rcx -; ILP-NEXT: andq %r8, %rdi -; ILP-NEXT: bsrq %rdi, %r8 +; ILP-NEXT: movl $0, %edi +; ILP-NEXT: sbbq %rcx, %rdi +; ILP-NEXT: sbbq %r8, %r9 +; ILP-NEXT: andq %r8, %r9 +; ILP-NEXT: bsrq %r9, %r8 ; ILP-NEXT: andq %rdx, %r10 ; ILP-NEXT: bsrq %r10, %rdx ; ILP-NEXT: xorq $63, %r8 +; ILP-NEXT: andq %rcx, %rdi +; ILP-NEXT: bsrq %rdi, %rcx +; ILP-NEXT: xorq $63, %rcx ; ILP-NEXT: addq $64, %rcx -; ILP-NEXT: testq %rdi, %rdi -; ILP-NEXT: movq $0, 24(%rax) -; ILP-NEXT: movq $0, 16(%rax) -; ILP-NEXT: movq $0, 8(%rax) +; ILP-NEXT: testq %r9, %r9 ; ILP-NEXT: cmovneq %r8, %rcx ; ILP-NEXT: xorq $63, %rdx ; ILP-NEXT: andq %rsi, %r11 @@ -287,6 +282,9 @@ ; ILP-NEXT: orq %r9, %rdi ; ILP-NEXT: cmovneq %rcx, %rsi ; ILP-NEXT: movq %rsi, (%rax) +; ILP-NEXT: movq $0, 24(%rax) +; ILP-NEXT: movq $0, 16(%rax) +; ILP-NEXT: movq $0, 8(%rax) ; ILP-NEXT: retq ; ; HYBRID-LABEL: test2: @@ -457,46 +455,48 @@ define i256 @test3(i256 %n) nounwind { ; ILP-LABEL: test3: ; ILP: # %bb.0: +; ILP-NEXT: pushq %rbx ; ILP-NEXT: movq %rdi, %rax -; ILP-NEXT: xorl %r10d, %r10d +; ILP-NEXT: xorl %edi, %edi ; ILP-NEXT: movq %rsi, %r9 ; ILP-NEXT: negq %r9 +; ILP-NEXT: movl $0, %r10d +; ILP-NEXT: sbbq %rdx, %r10 ; ILP-NEXT: movl $0, %r11d -; ILP-NEXT: sbbq %rdx, %r11 -; ILP-NEXT: movl $0, %edi -; ILP-NEXT: sbbq %rcx, %rdi -; ILP-NEXT: sbbq %r8, %r10 +; ILP-NEXT: sbbq %rcx, %r11 +; ILP-NEXT: sbbq %r8, %rdi +; ILP-NEXT: notq %r8 +; ILP-NEXT: andq %rdi, %r8 +; ILP-NEXT: bsrq %r8, %rbx +; ILP-NEXT: notq %rdx +; ILP-NEXT: andq %r10, %rdx +; ILP-NEXT: bsrq %rdx, %r10 +; ILP-NEXT: notq %rsi +; ILP-NEXT: xorq $63, %rbx ; ILP-NEXT: notq %rcx -; ILP-NEXT: andq %rdi, %rcx +; ILP-NEXT: andq %r11, %rcx ; ILP-NEXT: bsrq %rcx, %rdi -; ILP-NEXT: notq %rdx -; ILP-NEXT: andq %r11, %rdx ; ILP-NEXT: xorq $63, %rdi -; ILP-NEXT: notq %r8 -; ILP-NEXT: andq %r10, %r8 -; ILP-NEXT: bsrq %r8, %r10 -; ILP-NEXT: xorq $63, %r10 ; ILP-NEXT: addq $64, %rdi -; ILP-NEXT: bsrq %rdx, %r11 -; ILP-NEXT: notq %rsi ; ILP-NEXT: testq %r8, %r8 -; ILP-NEXT: movq $0, 24(%rax) -; ILP-NEXT: movq $0, 16(%rax) -; ILP-NEXT: movq $0, 8(%rax) -; ILP-NEXT: cmovneq %r10, %rdi -; ILP-NEXT: xorq $63, %r11 +; ILP-NEXT: cmovneq %rbx, %rdi +; ILP-NEXT: xorq $63, %r10 ; ILP-NEXT: andq %r9, %rsi -; ILP-NEXT: movl $127, %r9d +; ILP-NEXT: movl $127, %ebx ; ILP-NEXT: bsrq %rsi, %rsi -; ILP-NEXT: cmoveq %r9, %rsi +; ILP-NEXT: cmoveq %rbx, %rsi ; ILP-NEXT: xorq $63, %rsi ; ILP-NEXT: addq $64, %rsi ; ILP-NEXT: testq %rdx, %rdx -; ILP-NEXT: cmovneq %r11, %rsi +; ILP-NEXT: cmovneq %r10, %rsi ; ILP-NEXT: subq $-128, %rsi -; ILP-NEXT: orq %rcx, %r8 +; ILP-NEXT: orq %r8, %rcx ; ILP-NEXT: cmovneq %rdi, %rsi ; ILP-NEXT: movq %rsi, (%rax) +; ILP-NEXT: movq $0, 24(%rax) +; ILP-NEXT: movq $0, 16(%rax) +; ILP-NEXT: movq $0, 8(%rax) +; ILP-NEXT: popq %rbx ; ILP-NEXT: retq ; ; HYBRID-LABEL: test3: diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll --- a/llvm/test/CodeGen/X86/subcarry.ll +++ b/llvm/test/CodeGen/X86/subcarry.ll @@ -25,9 +25,9 @@ ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 +; CHECK-NEXT: movq %rcx, 16(%rdi) ; CHECK-NEXT: movq %rdx, 8(%rdi) ; CHECK-NEXT: movq %rsi, (%rdi) -; CHECK-NEXT: movq %rcx, 16(%rdi) ; CHECK-NEXT: movq %r8, 24(%rdi) ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/umul-with-overflow.ll b/llvm/test/CodeGen/X86/umul-with-overflow.ll --- a/llvm/test/CodeGen/X86/umul-with-overflow.ll +++ b/llvm/test/CodeGen/X86/umul-with-overflow.ll @@ -494,10 +494,10 @@ ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, (%edx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, 4(%edx) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, (%edx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, 8(%edx) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, 12(%edx) @@ -530,17 +530,17 @@ ; X64-NEXT: pushq %rbx ; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r8, %r11 -; X64-NEXT: movq %rcx, %r8 +; X64-NEXT: movq %rcx, %r10 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rdi, %r12 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r13, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %r14, %rdi @@ -559,22 +559,22 @@ ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq %rdi, %rbx -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: movq %r11, %rax -; X64-NEXT: mulq %r10 -; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: addq %rcx, %r14 -; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r8, %rax +; X64-NEXT: adcq $0, %r8 +; X64-NEXT: movq %r10, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %r14, %rdi -; X64-NEXT: adcq %r10, %rdx +; X64-NEXT: adcq %r8, %rdx ; X64-NEXT: imulq %rcx, %r11 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; X64-NEXT: addq %rbp, %r15 @@ -583,7 +583,7 @@ ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, %r10 +; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %r13, %rax ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rbp @@ -597,20 +597,20 @@ ; X64-NEXT: adcq %rbp, %rdx ; X64-NEXT: imulq %rcx, %r13 ; X64-NEXT: addq %rdx, %r13 -; X64-NEXT: addq %r15, %r10 +; X64-NEXT: addq %r15, %r8 ; X64-NEXT: adcq %rdi, %rax ; X64-NEXT: adcq %r11, %r13 -; X64-NEXT: imulq %r14, %r8 -; X64-NEXT: addq %r13, %r8 +; X64-NEXT: imulq %r14, %r10 +; X64-NEXT: addq %r13, %r10 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: imulq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: imulq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: addq %rcx, %rsi -; X64-NEXT: addq %r8, %rsi +; X64-NEXT: addq %r10, %rsi +; X64-NEXT: movq %r9, 8(%r12) ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, (%r12) -; X64-NEXT: movq %r9, 8(%r12) -; X64-NEXT: movq %r10, 16(%r12) +; X64-NEXT: movq %r8, 16(%r12) ; X64-NEXT: movq %rax, 24(%r12) ; X64-NEXT: movl %esi, 32(%r12) ; X64-NEXT: shrq $32, %rsi diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll @@ -38,8 +38,8 @@ ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: subl $24, %esp -; X86-NEXT: .cfi_def_cfa_offset 44 +; X86-NEXT: subl $28, %esp +; X86-NEXT: .cfi_def_cfa_offset 48 ; X86-NEXT: .cfi_offset %esi, -20 ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 @@ -60,45 +60,42 @@ ; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull %ebx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %esi, %ecx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: addl %esi, %ebp ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %esi, %ebx -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: addl %esi, %ebp -; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull %ebx -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: addl %ebp, %edi +; X86-NEXT: movl %eax, %edi +; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: addl %ecx, %edi +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: mull %esi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: addl %edi, %ebx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: adcl %ecx, %edi +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %ebp, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %esi, %ecx ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %ecx +; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %esi, %ecx +; X86-NEXT: addl %edi, %ecx ; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: mull %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, %esi ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -106,12 +103,12 @@ ; X86-NEXT: setb %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: mull %edi ; X86-NEXT: addl %esi, %eax ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %ecx, %edx -; X86-NEXT: addl %ebx, %eax -; X86-NEXT: adcl %edi, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: adcl %ebx, %edx ; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: testl %ebp, %ebp ; X86-NEXT: setne %cl @@ -124,10 +121,10 @@ ; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload ; X86-NEXT: orb %ch, %cl ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: setne %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: testl %edi, %edi +; X86-NEXT: setne %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: testl %ebp, %ebp ; X86-NEXT: setne %bh ; X86-NEXT: andb %cl, %bh ; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload @@ -136,12 +133,12 @@ ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: orl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: setne %bl -; X86-NEXT: orl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: orl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, (%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl %esi, 4(%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, (%ecx) ; X86-NEXT: movl %eax, 8(%ecx) ; X86-NEXT: movl %edx, 12(%ecx) ; X86-NEXT: setne %al @@ -153,7 +150,7 @@ ; X86-NEXT: andb $1, %al ; X86-NEXT: movb %al, 16(%ecx) ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: addl $24, %esp +; X86-NEXT: addl $28, %esp ; X86-NEXT: .cfi_def_cfa_offset 20 ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 16