Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13315,6 +13315,25 @@ if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; + if (TLI.getTargetMachine().getTargetTriple().getArch() == + llvm::Triple::aarch64) { + if (N0.getOpcode() == ISD::TRUNCATE) { + + if (SDValue lowerLoad = reduceLoadWidth(N0.getNode())) { + SDNode *load = N0.getOperand(0).getNode(); + + if (lowerLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), lowerLoad); + AddToWorklist(load); + } + } + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + return SDValue(N, 0); + } + } + + // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3640,12 +3640,13 @@ const Value *SV = I.getOperand(0); SDValue N = getValue(SV); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDLoc Dl = getCurSDLoc(); unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS)) - N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); + N = DAG.getAddrSpaceCast(Dl, DestVT, N, SrcAS, DestAS); setValue(&I, N); } @@ -4233,6 +4234,28 @@ SmallVector Values(NumValues); SmallVector Chains(std::min(MaxParallelChains, NumValues)); + EVT PtrVT = Ptr.getValueType(); + + + if (TLI.getTargetMachine().getTargetTriple().getArch() == + llvm::Triple::aarch64) { + unsigned SrcAS = SV->getType()->getPointerAddressSpace(); + if ((SrcAS == 270) || (SrcAS == 271)) { + MVT DVT = MVT::i32; + MVT DestVT = MVT::i64; + + Ptr = DAG.getPtrExtOrTrunc(Ptr, dl, DVT); + Ptr = DAG.getZExtOrTrunc(Ptr, dl, DVT); + if (SrcAS == 270) { + Ptr = DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Ptr, + DAG.getTargetConstant(0, dl, DestVT)); + } else if (SrcAS == 271) { + Ptr = DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Ptr, + DAG.getTargetConstant(0, dl, DestVT)); + } + } + } + unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { @@ -4372,9 +4395,29 @@ SDValue Src = getValue(SrcV); SDValue Ptr = getValue(PtrV); + SDLoc dl = getCurSDLoc(); + if (TLI.getTargetMachine().getTargetTriple().getArch() == + llvm::Triple::aarch64) { + unsigned SrcAS = PtrV->getType()->getPointerAddressSpace(); + if ((SrcAS == 270) || (SrcAS == 271)) { + MVT DVT = MVT::i32; + MVT DestVT = MVT::i64; + + Ptr = DAG.getPtrExtOrTrunc(Ptr, dl, DVT); + Ptr = DAG.getZExtOrTrunc(Ptr, dl, DVT); + if (SrcAS == 270) { + Ptr = DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Ptr, + DAG.getTargetConstant(0, dl, DestVT)); + } else if (SrcAS == 271) { + Ptr = DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Ptr, + DAG.getTargetConstant(0, dl, DestVT)); + } + } + } + + SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot(); SmallVector Chains(std::min(MaxParallelChains, NumValues)); - SDLoc dl = getCurSDLoc(); Align Alignment = I.getAlign(); AAMDNodes AAInfo = I.getAAMetadata(); Index: llvm/lib/Target/AArch64/AArch64.h =================================================================== --- llvm/lib/Target/AArch64/AArch64.h +++ llvm/lib/Target/AArch64/AArch64.h @@ -106,6 +106,10 @@ void initializeLDTLSCleanupPass(PassRegistry&); void initializeSMEABIPass(PassRegistry &); void initializeSVEIntrinsicOptsPass(PassRegistry &); +namespace ARM64AS { +enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 }; +} + } // end namespace llvm #endif Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -478,6 +478,9 @@ setOperationAction(ISD::XOR, MVT::i32, Custom); setOperationAction(ISD::XOR, MVT::i64, Custom); + setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); + setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); + // Virtually no operation on f128 is legal, but LLVM can't expand them when // there's a valid register class, so we need custom operations in most cases. setOperationAction(ISD::FABS, MVT::f128, Expand); @@ -5631,6 +5634,41 @@ ST->getBasePtr(), ST->getMemOperand()); } +static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + SDValue Src = Op.getOperand(0); + MVT DestVT = Op.getSimpleValueType(); + AddrSpaceCastSDNode *N = cast(Op.getNode()); + unsigned SrcAS = N->getSrcAddressSpace(); + unsigned DestAS = N->getDestAddressSpace(); + + assert(SrcAS != DestAS && + "addrspacecast must be between different address spaces"); + + EVT LowerVT = MVT::i32; + + if ((SrcAS == ARM64AS::PTR32_SPTR || SrcAS == ARM64AS::PTR32_UPTR) || + (DestAS == ARM64AS::PTR32_SPTR || DestAS == ARM64AS::PTR32_UPTR)) { + Src = DAG.getPtrExtOrTrunc(Src, dl, LowerVT); + Src = DAG.getZExtOrTrunc(Src, dl, LowerVT); + + if (SrcAS == ARM64AS::PTR32_SPTR) { + return DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Src, + DAG.getTargetConstant(0, dl, DestVT)); + } else if (SrcAS == ARM64AS::PTR32_UPTR) { + return DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Src, DAG.getTargetConstant(0, dl, DestVT)); + } else if ((DestAS == ARM64AS::PTR32_SPTR) || + (DestAS == ARM64AS::PTR32_UPTR)) { + SDValue Ext = DAG.getAnyExtOrTrunc(Src.getOperand(0), dl, DestVT); + SDValue Trunc = DAG.getZeroExtendInReg(Ext, dl, LowerVT); + return Trunc; + + } + } + return Op; +} + + // Custom lowering for any store, vector or scalar and/or default or with // a truncate operations. Currently only custom lower truncate operation // from vector v4i16 to v4i8 or volatile stores of i128. @@ -6071,6 +6109,8 @@ case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: return LowerFixedLengthVectorIntExtendToSVE(Op, DAG); + case ISD::ADDRSPACECAST: + return LowerADDRSPACECAST(Op, DAG); case ISD::SIGN_EXTEND_INREG: { // Only custom lower when ExtraVT has a legal byte based element type. EVT ExtraVT = cast(Op.getOperand(1))->getVT(); @@ -24003,6 +24043,11 @@ ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget); return; } + case ISD::ADDRSPACECAST: { + SDValue V = LowerADDRSPACECAST(SDValue(N,0), DAG); + Results.push_back(V); + return; + } case ISD::ATOMIC_LOAD: case ISD::LOAD: { MemSDNode *LoadNode = cast(N); Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3901,6 +3901,18 @@ return; } + if (AArch64::GPR32RegClass.contains(DestReg) && + AArch64::GPR64RegClass.contains(SrcReg)) { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MCRegister DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + BuildMI(MBB, I, DL, get(AArch64::UBFMXri), DestRegX) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0) + .addImm(31); + return; + } + #ifndef NDEBUG const TargetRegisterInfo &TRI = getRegisterInfo(); errs() << TRI.getRegAsmName(DestReg) << " = COPY " Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7854,10 +7854,10 @@ def : Pat<(i64 (anyext GPR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; -// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and -// then assert the extension has happened. +// When we need to explicitly zero-extend, we use an unsigned bitfield move +// instruction (UBFM) on the enclosing super-reg. def : Pat<(i64 (zext GPR32:$src)), - (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; + (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; // To sign extend, we use a signed bitfield move instruction (SBFM) on the // containing super-reg. Index: llvm/lib/Target/AArch64/AArch64TargetMachine.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -63,7 +63,8 @@ /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. + if (getPointerSize(SrcAS) != getPointerSize(DestAS)) + return false; return true; } Index: llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp =================================================================== --- llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -127,8 +127,13 @@ break; case 31: // *xtw is only valid for signed 64-bit operations. - if (Is64Bit && IsSigned) - AsmMnemonic = "sxtw"; + if (Is64Bit) { + if (IsSigned) { + AsmMnemonic = "sxtw"; + } else { + AsmMnemonic = "uxtw"; + } + } break; } Index: llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/aarch64-mixed-ptr-sizes.ll @@ -0,0 +1,133 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s --check-prefixes=ALL,CHECK + +; Source to regenerate: +; struct Foo { +; int * __ptr32 p32; +; int * __ptr64 p64; +; __attribute__((address_space(9))) int *p_other; +; }; +; void use_foo(Foo *f); +; void test_sign_ext(Foo *f, int * __ptr32 __sptr i) { +; f->p64 = i; +; use_foo(f); +; } +; void test_zero_ext(Foo *f, int * __ptr32 __uptr i) { +; f->p64 = i; +; use_foo(f); +; } +; void test_trunc(Foo *f, int * __ptr64 i) { +; f->p32 = i; +; use_foo(f); +; } +; void test_noop1(Foo *f, int * __ptr32 i) { +; f->p32 = i; +; use_foo(f); +; } +; void test_noop2(Foo *f, int * __ptr64 i) { +; f->p64 = i; +; use_foo(f); +; } +; void test_null_arg(Foo *f, int * __ptr32 i) { +; test_noop1(f, 0); +; } +; void test_unrecognized(Foo *f, __attribute__((address_space(14))) int *i) { +; f->p32 = (int * __ptr32)i; +; use_foo(f); +; } +; +; $ clang -cc1 -triple x86_64-windows-msvc -fms-extensions -O2 -S t.cpp + +target datalayout = "e-m:e-i8:8:32-i16:16:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +%struct.Foo = type { ptr addrspace(270), ptr, ptr addrspace(9) } +declare dso_local void @use_foo(ptr) + +define dso_local void @test_sign_ext(ptr %f, ptr addrspace(270) %i) { +; ALL-LABEL: test_sign_ext: +; ALL: # %bb.0: # %entry +; ALL-NEXT: sxtw x8, w1 +; ALL-NEXT: str x8, [x0, #8] +; ALL-NEXT: b use_foo # TAILCALL +entry: + %0 = addrspacecast ptr addrspace(270) %i to ptr + %p64 = getelementptr inbounds %struct.Foo, ptr %f, i64 0, i32 1 + store ptr %0, ptr %p64, align 8 + tail call void @use_foo(ptr %f) + ret void +} + +define dso_local void @test_zero_ext(ptr %f, ptr addrspace(271) %i) { +; CHECK-LABEL: test_zero_ext: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: uxtw x8, w1 +; CHECK-NEXT: str x8, [x0, #8] +; CHECK-NEXT: b use_foo # TAILCALL +entry: + %0 = addrspacecast ptr addrspace(271) %i to ptr + %p64 = getelementptr inbounds %struct.Foo, ptr %f, i64 0, i32 1 + store ptr %0, ptr %p64, align 8 + tail call void @use_foo(ptr %f) + ret void +} + +define dso_local void @test_trunc(ptr %f, ptr %i) { +; CHECK-LABEL: test_trunc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: str w1, [x0] +; CHECK-NEXT: b use_foo # TAILCALL +entry: + %0 = addrspacecast ptr %i to ptr addrspace(270) + store ptr addrspace(270) %0, ptr %f, align 8 + tail call void @use_foo(ptr %f) + ret void +} + +define dso_local void @test_noop1(ptr %f, ptr addrspace(270) %i) { +; ALL-LABEL: test_noop1: +; ALL: # %bb.0: # %entry +; ALL-NEXT: str w1, [x0] +; ALL-NEXT: b use_foo # TAILCALL +entry: + store ptr addrspace(270) %i, ptr %f, align 8 + tail call void @use_foo(ptr %f) + ret void +} + +define dso_local void @test_noop2(ptr %f, ptr %i) { +; ALL-LABEL: test_noop2: +; ALL: # %bb.0: # %entry +; ALL-NEXT: str x1, [x0, #8] +; ALL-NEXT: b use_foo # TAILCALL +entry: + %p64 = getelementptr inbounds %struct.Foo, ptr %f, i64 0, i32 1 + store ptr %i, ptr %p64, align 8 + tail call void @use_foo(ptr %f) + ret void +} + +; Test that null can be passed as a 32-bit pointer. +define dso_local void @test_null_arg(ptr %f) { +; ALL-LABEL: test_null_arg: +; ALL: # %bb.0: # %entry +; ALL-NEXT: str wzr, [x0] +; ALL-NEXT: b use_foo +entry: + call void @test_noop1(ptr %f, ptr addrspace(270) null) + ret void +} + +; Test casts between unrecognized address spaces. +define void @test_unrecognized(ptr %f, ptr addrspace(14) %i) { +; CHECK-LABEL: test_unrecognized: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: str w1, [x0] +; CHECK-NEXT: b use_foo # TAILCALL +entry: + %0 = addrspacecast ptr addrspace(14) %i to ptr addrspace(270) + store ptr addrspace(270) %0, ptr %f, align 8 + tail call void @use_foo(ptr %f) + ret void +} +