diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1158,6 +1158,8 @@ MyFlags.VT = RegisterVT; MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetTy->isPointerTy()) + MyFlags.Flags.setPointer(); if (CLI.RetSExt) MyFlags.Flags.setSExt(); if (CLI.RetZExt) @@ -1178,6 +1180,8 @@ FinalType, CLI.CallConv, CLI.IsVarArg); ISD::ArgFlagsTy Flags; + if (Arg.Ty->isPointerTy()) + Flags.setPointer(); if (Arg.IsZExt) Flags.setZExt(); if (Arg.IsSExt) diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -977,9 +977,6 @@ bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(DL, Ty, true); - if (Subtarget->isTargetILP32() && Ty->isPointerTy()) - return false; - // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; @@ -1022,9 +1019,6 @@ } bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { - if (Subtarget->isTargetILP32()) - return false; - unsigned ScaleFactor = getImplicitScaleFactor(VT); if (!ScaleFactor) return false; @@ -1209,6 +1203,30 @@ if (NeedExtend) LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); + bool IsILP32Pointer = + Subtarget->isTargetILP32() && RHS->getType()->isPointerTy(); + + const auto &ExtendResult = [&](unsigned ResultReg) -> unsigned { + if (!ResultReg || !IsILP32Pointer) + return ResultReg; + + unsigned Result64 = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::SUBREG_TO_REG)) + .addDef(Result64) + .addImm(0) + .addReg(ResultReg, RegState::Kill) + .addImm(AArch64::sub_32); + return Result64; + }; + + if (IsILP32Pointer) { + RetVT = MVT::i32; + LHSReg = + fastEmitInst_extractsubreg(MVT::i32, LHSReg, false, AArch64::sub_32); + } + + unsigned ResultReg = 0; if (const auto *C = dyn_cast(RHS)) { uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); @@ -1224,11 +1242,12 @@ WantResult); if (ResultReg) - return ResultReg; + return ExtendResult(ResultReg); // Only extend the RHS within the instruction if there is a valid extend type. if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && isValueAvailable(RHS)) { + assert(!RHS->getType()->isPointerTy() && "ILP32 broken"); if (const auto *SI = dyn_cast(RHS)) if (const auto *C = dyn_cast(SI->getOperand(1))) if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { @@ -1260,6 +1279,7 @@ assert(isa(MulRHS) && "Expected a ConstantInt."); uint64_t ShiftVal = cast(MulRHS)->getValue().logBase2(); + assert(!RHS->getType()->isPointerTy() && "ILP32 broken"); unsigned RHSReg = getRegForValue(MulLHS); if (!RHSReg) return 0; @@ -1285,6 +1305,7 @@ } uint64_t ShiftVal = C->getZExtValue(); if (ShiftType != AArch64_AM::InvalidShiftExtend) { + assert(!RHS->getType()->isPointerTy() && "ILP32 broken"); unsigned RHSReg = getRegForValue(SI->getOperand(0)); if (!RHSReg) return 0; @@ -1302,13 +1323,18 @@ unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) return 0; + + if (IsILP32Pointer) + RHSReg = + fastEmitInst_extractsubreg(MVT::i32, RHSReg, false, AArch64::sub_32); + bool RHSIsKill = hasTrivialKill(RHS); if (NeedExtend) RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); - return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, - SetFlags, WantResult); + return ExtendResult(emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, SetFlags, WantResult)); } unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, @@ -1983,6 +2009,7 @@ cast(I)->isAtomic()) return false; + MVT MemVT = TLI.getMemValueType(DL, I->getType()).getSimpleVT(); const Value *SV = I->getOperand(0); if (TLI.supportSwiftError()) { // Swifterror values can come from either a function parameter with @@ -2003,17 +2030,20 @@ if (!computeAddress(I->getOperand(0), Addr, I->getType())) return false; - // Fold the following sign-/zero-extend into the load instruction. + // Fold the following sign-/zero-extend into the load instruction. An ILP32 + // pointer gets marked for zero-extension at this point. bool WantZExt = true; MVT RetVT = VT; const Value *IntExtVal = nullptr; if (I->hasOneUse()) { if (const auto *ZE = dyn_cast(I->use_begin()->getUser())) { + assert(MemVT == RetVT && "unexpected extension of pointer"); if (isTypeSupported(ZE->getType(), RetVT)) IntExtVal = ZE; else RetVT = VT; } else if (const auto *SE = dyn_cast(I->use_begin()->getUser())) { + assert(MemVT == RetVT && "unexpected extension of pointer"); if (isTypeSupported(SE->getType(), RetVT)) IntExtVal = SE; else @@ -2023,7 +2053,7 @@ } unsigned ResultReg = - emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); + emitLoad(MemVT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); if (!ResultReg) return false; @@ -2099,11 +2129,19 @@ } const MCInstrDesc &II = TII.get(Opc); - SrcReg = constrainOperandRegClass(II, SrcReg, 0); + unsigned SubReg = 0; + if (VT == MVT::i32 && TRI.getRegSizeInBits(SrcReg, MRI) == 64) { + assert(VT == MVT::i32 && TRI.getRegSizeInBits(SrcReg, MRI) == 64 && + Subtarget->isTargetILP32()); + MRI.constrainRegClass(SrcReg, &AArch64::GPR64RegClass); + SubReg = AArch64::sub_32; + } else + SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); + AddrReg = constrainOperandRegClass(II, AddrReg, 1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(SrcReg) - .addReg(AddrReg) + .addUse(SrcReg, 0, SubReg) + .addUse(AddrReg) .addMemOperand(MMO); return true; } @@ -2166,11 +2204,19 @@ assert(ANDReg && "Unexpected AND instruction emission failure."); SrcReg = ANDReg; } - // Create the base instruction, then add the operands. + const MCInstrDesc &II = TII.get(Opc); - SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); + unsigned SubReg = 0; + if (VT == MVT::i32 && TRI.getRegSizeInBits(SrcReg, MRI) == 64) { + MRI.constrainRegClass(SrcReg, &AArch64::GPR64RegClass); + SubReg = AArch64::sub_32; + } else + SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); + + // Create the base instruction, then add the operands. MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addUse(SrcReg, 0, SubReg); addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); return true; @@ -2185,6 +2231,9 @@ if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) return false; + auto *SI = cast(I); + MVT MemVT = + TLI.getMemValueType(DL, SI->getOperand(0)->getType()).getSimpleVT(); const Value *PtrV = I->getOperand(1); if (TLI.supportSwiftError()) { // Swifterror values can come from either a function parameter with @@ -2205,11 +2254,11 @@ unsigned SrcReg = 0; if (const auto *CI = dyn_cast(Op0)) { if (CI->isZero()) - SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; + SrcReg = (MemVT == MVT::i64) ? AArch64::XZR : AArch64::WZR; } else if (const auto *CF = dyn_cast(Op0)) { if (CF->isZero() && !CF->isNegative()) { - VT = MVT::getIntegerVT(VT.getSizeInBits()); - SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; + MemVT = MVT::getIntegerVT(VT.getSizeInBits()); + SrcReg = (MemVT == MVT::i64) ? AArch64::XZR : AArch64::WZR; } } @@ -2219,8 +2268,6 @@ if (!SrcReg) return false; - auto *SI = cast(I); - // Try to emit a STLR for seq_cst/release. if (SI->isAtomic()) { AtomicOrdering Ord = SI->getOrdering(); @@ -2228,7 +2275,7 @@ if (isReleaseOrStronger(Ord)) { // The STLR addressing mode only supports a base reg; pass that directly. unsigned AddrReg = getRegForValue(PtrV); - return emitStoreRelease(VT, SrcReg, AddrReg, + return emitStoreRelease(MemVT, SrcReg, AddrReg, createMachineMemOperandFor(I)); } } @@ -2238,7 +2285,7 @@ if (!computeAddress(PtrV, Addr, Op0->getType())) return false; - if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) + if (!emitStore(MemVT, SrcReg, Addr, createMachineMemOperandFor(I))) return false; return true; } @@ -2313,6 +2360,14 @@ if (BW > 64) return false; + // Signed ILP32 comparisons must be done at 32-bits width because the pointer + // is zero-extended to 64-bits. + bool IsILP32Pointer = false; + if (Subtarget->isTargetILP32() && LHS->getType()->isPointerTy()) { + IsILP32Pointer = true; + BW = 32; + } + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; @@ -2397,7 +2452,7 @@ return false; bool SrcIsKill = hasTrivialKill(LHS); - if (BW == 64 && !Is64Bit) + if ((BW == 64 && !Is64Bit) || IsILP32Pointer) SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, AArch64::sub_32); @@ -3079,6 +3134,7 @@ for (CCValAssign &VA : ArgLocs) { const Value *ArgVal = CLI.OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; + auto ArgFlags = CLI.OutFlags[VA.getValNo()]; unsigned ArgReg = getRegForValue(ArgVal); if (!ArgReg) @@ -3106,12 +3162,24 @@ return false; break; } + case CCValAssign::Trunc: { + assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::i64); + ArgVT = MVT::i32; + ArgReg = + fastEmitInst_extractsubreg(ArgVT, ArgReg, false, AArch64::sub_32); + if (!ArgReg) + return false; + break; + } default: llvm_unreachable("Unknown arg promotion!"); } // Now copy/store arg to correct locations. if (VA.isRegLoc() && !VA.needsCustom()) { + if (Subtarget->isTargetILP32() && ArgFlags.isPointer()) + ArgReg = emitAnd_ri(MVT::i64, ArgReg, false, 0xffffffff); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); CLI.OutRegs.push_back(VA.getLocReg()); @@ -3202,11 +3270,6 @@ if (IsTailCall) return false; - // FIXME: we could and should support this, but for now correctness at -O0 is - // more important. - if (Subtarget->isTargetILP32()) - return false; - CodeModel::Model CM = TM.getCodeModel(); // Only support the small-addressing and large code models. if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) @@ -5011,9 +5074,6 @@ /// simple cases. This is because the standard fastEmit functions don't cover /// MUL at all and ADD is lowered very inefficientily. bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { - if (Subtarget->isTargetILP32()) - return false; - unsigned N = getRegForValue(I->getOperand(0)); if (!N) return false; @@ -5078,6 +5138,10 @@ if (!N) return false; } + + if (Subtarget->isTargetILP32() && !cast(I)->isInBounds()) + N = emitAnd_ri(MVT::i64, N, NIsKill, 0xffffffffu); + updateValueMap(I, N); return true; } diff --git a/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll b/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll --- a/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll @@ -1,10 +1,11 @@ -; RUN: llc -mtriple=arm64_32-apple-ios -O0 -fast-isel %s -o - | FileCheck %s +; RUN: llc -mtriple=arm64_32-apple-ios -O0 -fast-isel -fast-isel-abort=1 %s -o - | FileCheck %s + @var = global i8* null define void @test_store_release_ptr() { ; CHECK-LABEL: test_store_release_ptr -; CHECK: mov [[ZERO:w[0-9]+]], wzr -; CHECK: stlr [[ZERO]] +; CHECK: mov {{w|x}}[[ZERO:[0-9]+]], {{w|x}}zr +; CHECK: stlr w[[ZERO]] store atomic i8* null, i8** @var release, align 4 br label %next @@ -18,7 +19,7 @@ ; CHECK-LABEL: test_struct_return: ; CHECK: bl _callee ; CHECK-DAG: lsr [[HI:x[0-9]+]], x0, #32 -; CHECK-DAG: str w0 +; CHECK-DAG: mov {{w[0-9]+}}, w0 %res = call [2 x i32] @callee() %res.0 = extractvalue [2 x i32] %res, 0 store i32 %res.0, i32* %addr @@ -47,3 +48,154 @@ %res = insertvalue {i8*} undef, i8* %res.ptr, 0 ret {i8*} %res } + + +define void @test_pointer_call(i64 %in) { +; CHECK-LABEL: test_pointer_call: +; CHECK: and x0, x0, #0xffffffff +; CHECK: bl _test_struct_return + + ; Call a random function taking a pointer. Ignore the name. + %ptr = inttoptr i64 %in to i32* + call void @test_struct_return(i32* %ptr) + ret void +} + +define void @test_stack_pointer_call() { +; CHECK-LABEL: test_stack_pointer_call: +; CHECK: add x[[VAR:[0-9]+]], sp, # +; CHECK: mov w[[VAR1:[0-9]+]], w[[VAR]] +; CHECK-DAG: str w[[VAR1]], [sp] +; CHECK-DAG: str w[[VAR]], [sp, #4] + + %var = alloca i8 + call i8* @test_stack_pointer_arg(i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i8* %var, i8* %var) + ret void +} + +define i8* @test_stack_pointer_arg(i64, i64, i64, i64, i64, i64, i64, i64, i8* %in1, i8* %in2) { +; CHECK-LABEL: test_stack_pointer_arg: +; CHECK: ldr [[IN1:w[0-9]+]], [sp] +; CHECK: mov w[[IN1_TMP:[0-9]+]], [[IN1]] +; CHECK: and x0, x[[IN1_TMP]], #0xffffffff + + ret i8* %in1 +} + +define i8* @test_load_ptr(i8** %addr) { +; CHECK-LABEL: test_load_ptr: +; CHECK: ldr [[VAL:w[0-9]+]], [x0, #12] +; CHECK: mov w[[TMP:[0-9]+]], [[VAL]] +; CHECK: and x0, x[[TMP]], #0xffffffff + + %elt = getelementptr i8*, i8** %addr, i64 3 + %val = load i8*, i8** %elt + ret i8* %val +} + +define i64 @test_ext_load(i32* %addr) { +; CHECK-LABEL: test_ext_load: +; CHECK: ldrsw x0, [x0] + + %val = load i32, i32* %addr + %res = sext i32 %val to i64 + ret i64 %res +} + +define void @test_store_ptr(i8* %in, i8** %addr) { +; CHECK-LABEL: test_store_ptr: +; CHECK: str w0, [x1, #12] + + %elt = getelementptr i8*, i8** %addr, i64 3 + store i8* %in, i8** %elt + ret void +} + +define i8* @test_gep(i8* %in) { +; CHECK-LABEL: test_gep: +; CHECK: add [[SUM:x[0-9]+]], x0, #12 +; CHECK: and [[MASK:x[0-9]+]], [[SUM]], #0xffffffff +; CHECK: and x0, [[MASK]], #0xffffffff + %res = getelementptr i8, i8* %in, i32 12 + ret i8* %res +} + +define i8* @test_gep_inbounds(i8* %in) { +; CHECK-LABEL: test_gep_inbounds: +; CHECK: add [[SUM:x[0-9]+]], x0, #12 +; CHECK: and x0, [[SUM]], #0xffffffff +; CHECK-NEXT: ret +%res = getelementptr inbounds i8, i8* %in, i32 12 + ret i8* %res +} + +define i1 @test_cmp_bitfield(i8* %in) { +; CHECK-LABEL: test_cmp_bitfield: +; CHECK: ubfx {{x[0-9]+}}, x0, #31, #1 + + %tst = icmp slt i8* %in, null + ret i1 %tst +} + +declare void @foo() +declare void @bar() +define void @test_cmp_cbnz(i8* %in) { +; CHECK-LABEL: test_cmp_cbnz: +; CHECK: cbnz w0 + + %tst = icmp eq i8* %in, null + br i1 %tst, label %true, label %false + +true: + call void @foo() + ret void + +false: + call void @bar() + ret void +} + +define void @test_cmp_imm(i8* %in) { +; CHECK-LABEL: test_cmp_imm: +; CHECK: subs {{w[0-9]+}}, w0, #41 +; CHECK: b.hi + + %tst = icmp ult i8* %in, inttoptr(i32 42 to i8*) + br i1 %tst, label %true, label %false + +true: + call void @foo() + ret void + +false: + call void @bar() + ret void +} + +define void @test_cmp_reg(i8* %lhs, i8* %rhs) { +; CHECK-LABEL: test_cmp_reg: +; CHECK: cmp w0, w1 +; CHECK: b.hs + + %tst = icmp ult i8* %lhs, %rhs + br i1 %tst, label %true, label %false + +true: + call void @foo() + ret void + +false: + call void @bar() + ret void +} + +define i8* @test_select_ptr(i1 %tst, i8* %lhs, i8* %rhs) { +; CHECK-LABEL: test_select_ptr: +; CHECK: tst w0, #0 +; CHECK: csel [[TMP:x[0-9]+]], x1, x2, ne +; CHECK: and x0, [[TMP]], #0xffffffff + %res = select i1 %tst, i8* %lhs, i8* %rhs + ret i8* %res +} diff --git a/llvm/test/CodeGen/AArch64/arm64_32-null.ll b/llvm/test/CodeGen/AArch64/arm64_32-null.ll --- a/llvm/test/CodeGen/AArch64/arm64_32-null.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32-null.ll @@ -1,10 +1,10 @@ -; RUN: llc -fast-isel=true -global-isel=false -O0 -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s -; RUN: llc -fast-isel=false -global-isel=false -O0 -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s +; RUN: llc -fast-isel=true -global-isel=false -O0 -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=FAST +; RUN: llc -fast-isel=false -global-isel=false -O0 -mtriple=arm64_32-apple-ios %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=OPT define void @test_store(i8** %p) { ; CHECK-LABEL: test_store: -; CHECK: mov [[R1:w[0-9]+]], wzr -; CHECK: str [[R1]], [x0] +; CHECK: mov {{x|w}}[[R1:[0-9]+]], {{x|w}}zr +; CHECK: str w[[R1]], [x0] store i8* null, i8** %p ret void @@ -16,8 +16,10 @@ ; CHECK: str [[R1]], [sp] ; CHECK: b [[BB:LBB[0-9_]+]] ; CHECK: [[BB]]: -; CHECK: ldr x0, [sp] -; CHECK: str w0, [x{{.*}}] +; CHECK-OPT: ldr x0, [sp] +; CHECK-OPT: str w0 +; CHECK-FAST: ldr x[[R2:[0-9]+]], [sp] +; CHECK-FAST: str [[R2]], [x{{.*}}] bb0: br label %bb1 diff --git a/llvm/test/CodeGen/AArch64/arm64_32.ll b/llvm/test/CodeGen/AArch64/arm64_32.ll --- a/llvm/test/CodeGen/AArch64/arm64_32.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32.ll @@ -37,7 +37,9 @@ define i32 @test_global_value() { ; CHECK-LABEL: test_global_value: ; CHECK: adrp x[[PAGE:[0-9]+]], _var32@PAGE -; CHECK: ldr w0, [x[[PAGE]], _var32@PAGEOFF] +; CHECK-OPT: ldr w0, [x[[PAGE]], _var32@PAGEOFF] +; CHECK-FAST: add x[[VAR32:[0-9]+]], x[[PAGE]], _var32@PAGEOFF +; CHECK-FAST: ldr w0, [x[[VAR32]]] %val = load i32, i32* @var32, align 4 ret i32 %val } @@ -60,8 +62,9 @@ define i32 @test_safe_indexed_add() { ; CHECK-LABEL: test_safe_indexed_add: ; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF -; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 -; CHECK: ldr w0, [x[[ADDR]]] +; CHECK-OPT: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 +; CHECK-OPT: ldr w0, [x[[ADDR]]] +; CHECK-FAST: ldr w0, [x[[VAR32]], #32] %addr_int = ptrtoint i32* @var32 to i64 %addr_plus_32 = add nuw i64 %addr_int, 32 %addr = inttoptr i64 %addr_plus_32 to i32* @@ -270,8 +273,10 @@ ; Safe to use the unextended address here define void @test_indirect_safe_call(i32* %weird_funcs) { ; CHECK-LABEL: test_indirect_safe_call: -; CHECK: add w[[ADDR32:[0-9]+]], w0, #4 +; CHECK-OPT: add w[[ADDR32:[0-9]+]], w0, #4 ; CHECK-OPT-NOT: ubfx +; CHECK-FAST: add [[TMP:x[0-9]+]], x0, #4 +; CHECK-FAST: and x[[ADDR32:[0-9]+]], [[TMP]], #0xffffffff ; CHECK: blr x[[ADDR32]] %addr = getelementptr i32, i32* %weird_funcs, i32 1 %func = bitcast i32* %addr to void()* @@ -536,7 +541,9 @@ ; CHECK: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]] ; CHECK: stur [[GUARD_VAL]], [x29, #[[GUARD_OFFSET:-[0-9]+]]] -; CHECK: add x0, sp, #{{[0-9]+}} +; CHECK-OPT: add x0, sp, #{{[0-9]+}} +; CHECK-FAST: add [[TMP:x[0-9]+]], sp, #{{[0-9]+}} +; CHECK-FAST: and x0, [[TMP]], #0xffffffff ; CHECK: bl _callee ; CHECK-OPT: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE @@ -560,6 +567,8 @@ ; CHECK-LABEL: test_landingpad_marshalling: ; CHECK-OPT: mov x2, x1 ; CHECK-OPT: mov x1, x0 +; CHECK-FAST: mov x2, x1 +; CHECK-FAST: and x1, x0, #0xffffffff ; CHECK: bl _eat_landingpad_args invoke void @callee([8 x i32]* undef) to label %done unwind label %lpad @@ -591,7 +600,9 @@ define void @test_asm_memory(i32* %base.addr) { ; CHECK-LABEL: test_asm_memory: -; CHECK: add w[[ADDR:[0-9]+]], w0, #4 +; CHECK-OPT: add w[[ADDR:[0-9]+]], w0, #4 +; CHECK-FAST: add [[TMP:x[0-9]+]], x0, #4 +; CHECK-FAST: and x[[ADDR:[0-9]+]], [[TMP]], #0xffffffff ; CHECK: str wzr, [x[[ADDR]] %addr = getelementptr i32, i32* %base.addr, i32 1 call void asm sideeffect "str wzr, $0", "*m"(i32* %addr) @@ -705,8 +716,9 @@ ; CHECK-OPT-NEXT: smaddl x0, w1, w[[SIZE]], x0 ; CHECK-OPT-NEXT: ret -; CHECK-FAST: mov w[[SIZE:[0-9]+]], #18 -; CHECK-FAST-NEXT: smaddl [[TMP:x[0-9]+]], w1, w[[SIZE]], x0 +; CHECK-FAST: sxtw [[EXT:x[0-9]+]], w1 +; CHECK-FAST-NEXT: mov [[SIZE:x[0-9]+]], #18 +; CHECK-FAST-NEXT: madd [[TMP:x[0-9]+]], [[EXT]], [[SIZE]], x0 ; CHECK-FAST-NEXT: and x0, [[TMP]], #0xffffffff ; CHECK-FAST-NEXT: ret %tmp0 = getelementptr inbounds { [18 x i8] }, { [18 x i8] }* %a0, i32 %a1 diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -54,7 +54,8 @@ ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov [[ID:x[0-9]+]], x21 ; CHECK-O0-AARCH64: cbnz x21 -; CHECK-O0-ARM64_32: cmp x21, #0 +; CHECK-O0-ARM64_32: mov [[TMP:w[0-9]+]], w21 +; CHECK-O0-ARM64_32: cbnz [[TMP]] entry: %error_ptr_ref = alloca swifterror %swift_error* store %swift_error* null, %swift_error** %error_ptr_ref @@ -95,7 +96,8 @@ ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov [[ID:x[0-9]+]], x21 ; CHECK-O0-AARCH64: cbnz x21 -; CHECK-O0-ARM64_32: cmp x21, #0 +; CHECK-O0-ARM64_32: mov [[TMP:w[0-9]+]], w21 +; CHECK-O0-ARM64_32: cbnz [[TMP]] entry: %error_ptr_ref = alloca swifterror %swift_error* br label %bb_loop @@ -304,7 +306,8 @@ ; CHECK-O0: bl {{.*}}foo_sret ; CHECK-O0: mov [[ID2:x[0-9]+]], x21 ; CHECK-O0-AARCH64: cbnz x21 -; CHECK-O0-ARM64_32: cmp x21, #0 +; CHECK-O0-ARM64_32: mov [[TMP:w[0-9]+]], w21 +; CHECK-O0-ARM64_32: cbnz [[TMP]] ; Access part of the error object and save it to error_ref ; reload from stack ; CHECK-O0: ldrb [[CODE:w[0-9]+]]