Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -154,7 +154,7 @@ fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); LegalizeResult - fewerElementsVectorLoadStore(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); LegalizeResult narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT Ty); Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -138,6 +138,11 @@ if (!LeftoverTy.isValid()) { assert(LeftoverRegs.empty()); + if (!ResultTy.isVector()) { + MIRBuilder.buildMerge(DstReg, PartRegs); + return; + } + if (PartTy.isVector()) MIRBuilder.buildConcatVectors(DstReg, PartRegs); else @@ -559,14 +564,11 @@ return Legalized; } case TargetOpcode::G_LOAD: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - const auto &MMO = **MI.memoperands_begin(); unsigned DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); + if (DstTy.isVector()) + return UnableToLegalize; if (8 * MMO.getSize() != DstTy.getSizeInBits()) { unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); @@ -577,40 +579,7 @@ return Legalized; } - // This implementation doesn't work for atomics. Give up instead of doing - // something invalid. - if (MMO.getOrdering() != AtomicOrdering::NotAtomic || - MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) - return UnableToLegalize; - - int NumParts = SizeOp0 / NarrowSize; - LLT OffsetTy = LLT::scalar( - MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); - - MachineFunction &MF = MIRBuilder.getMF(); - SmallVector DstRegs; - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg = 0; - unsigned Offset = i * NarrowSize / 8; - - MachineMemOperand *SplitMMO = - MF.getMachineMemOperand(&MMO, Offset, NarrowSize / 8); - - MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, - Offset); - - MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); - - DstRegs.push_back(DstReg); - } - - if (DstTy.isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); - MI.eraseFromParent(); - return Legalized; + return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); } case TargetOpcode::G_ZEXTLOAD: case TargetOpcode::G_SEXTLOAD: { @@ -640,15 +609,18 @@ return Legalized; } case TargetOpcode::G_STORE: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - const auto &MMO = **MI.memoperands_begin(); unsigned SrcReg = MI.getOperand(0).getReg(); LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.isVector()) + return UnableToLegalize; + + int NumParts = SizeOp0 / NarrowSize; + unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); + unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; + if (SrcTy.isVector() && LeftoverBits != 0) + return UnableToLegalize; if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); @@ -659,34 +631,7 @@ return Legalized; } - // This implementation doesn't work for atomics. Give up instead of doing - // something invalid. - if (MMO.getOrdering() != AtomicOrdering::NotAtomic || - MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) - return UnableToLegalize; - - int NumParts = SizeOp0 / NarrowSize; - LLT OffsetTy = LLT::scalar( - MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); - - SmallVector SrcRegs; - extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); - - MachineFunction &MF = MIRBuilder.getMF(); - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = 0; - unsigned Offset = i * NarrowSize / 8; - - MachineMemOperand *SplitMMO = - MF.getMachineMemOperand(&MMO, Offset, NarrowSize / 8); - - MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, - Offset); - - MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); - } - MI.eraseFromParent(); - return Legalized; + return reduceLoadStoreWidth(MI, 0, NarrowTy); } case TargetOpcode::G_CONSTANT: { // FIXME: add support for when SizeOp0 isn't an exact multiple of @@ -1834,8 +1779,8 @@ } LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorLoadStore(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { +LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { // FIXME: Don't know how to handle secondary types yet. if (TypeIdx != 0) return UnableToLegalize; @@ -1963,7 +1908,7 @@ return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: - return fewerElementsVectorLoadStore(MI, TypeIdx, NarrowTy); + return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); default: return UnableToLegalize; } Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -321,7 +321,7 @@ .fewerElementsIf([=, &ST](const LegalityQuery &Query) { unsigned MemSize = Query.MMODescrs[0].SizeInBits; - return (MemSize == 96) && + return Query.Types[0].isVector() && (MemSize == 96) && ST.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS; }, [=](const LegalityQuery &Query) { Index: test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll =================================================================== --- test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -205,7 +205,7 @@ ret void } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %3:_(s96), %0:_(p0) :: (store 12 into %ir.c, align 16) (in function: nonpow2_store_narrowing +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %5:_(s32) = G_EXTRACT %3:_(s96), 64 (in function: nonpow2_store_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_store_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_store_narrowing: define void @nonpow2_store_narrowing(i96* %c) { @@ -215,7 +215,7 @@ ret void } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %0:_(s96), %1:_(p0) :: (store 12 into `i96* undef`, align 16) (in function: nonpow2_constant_narrowing) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %3:_(s32) = G_EXTRACT %0:_(s96), 64 (in function: nonpow2_constant_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_constant_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_constant_narrowing: define void @nonpow2_constant_narrowing() { Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir @@ -246,26 +246,120 @@ ... --- -name: test_load_global_v3s32 +name: test_load_global_s96_align4 body: | bb.0: liveins: $vgpr0_vgpr1 - ; SI-LABEL: name: test_load_global_v3s32 + ; SI-LABEL: name: test_load_global_s96_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, align 8, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) - ; VI-LABEL: name: test_load_global_v3s32 + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, addrspace 1) + ; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 + ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) + ; VI-LABEL: name: test_load_global_s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, addrspace 1, align 16) + %1:_(s96) = G_LOAD %0 :: (load 12, addrspace 1, align 4) $vgpr0_vgpr1_vgpr2 = COPY %1 ... + +--- +name: test_load_global_s160_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_s160_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, addrspace 1) + ; SI: [[DEF:%[0-9]+]]:_(s160) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(s160) = G_INSERT [[DEF]], [[LOAD]](s64), 0 + ; SI: [[INSERT1:%[0-9]+]]:_(s160) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64 + ; SI: [[INSERT2:%[0-9]+]]:_(s160) = G_INSERT [[INSERT1]], [[LOAD2]](s32), 128 + ; SI: S_NOP 0, implicit [[INSERT2]](s160) + ; VI-LABEL: name: test_load_global_s160_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, addrspace 1) + ; VI: [[DEF:%[0-9]+]]:_(s160) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(s160) = G_INSERT [[DEF]], [[LOAD]](s64), 0 + ; VI: [[INSERT1:%[0-9]+]]:_(s160) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64 + ; VI: [[INSERT2:%[0-9]+]]:_(s160) = G_INSERT [[INSERT1]], [[LOAD2]](s32), 128 + ; VI: S_NOP 0, implicit [[INSERT2]](s160) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s160) = G_LOAD %0 :: (load 20, addrspace 1, align 4) + S_NOP 0, implicit %1 +... + +--- +name: test_load_global_s224_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_s224_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1) + ; SI: [[DEF:%[0-9]+]]:_(s224) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(s224) = G_INSERT [[DEF]], [[LOAD]](s64), 0 + ; SI: [[INSERT1:%[0-9]+]]:_(s224) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64 + ; SI: [[INSERT2:%[0-9]+]]:_(s224) = G_INSERT [[INSERT1]], [[LOAD2]](s64), 128 + ; SI: [[INSERT3:%[0-9]+]]:_(s224) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 192 + ; SI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; SI: [[INSERT4:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[INSERT3]](s224), 0 + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT4]](s256) + ; VI-LABEL: name: test_load_global_s224_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1) + ; VI: [[DEF:%[0-9]+]]:_(s224) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(s224) = G_INSERT [[DEF]], [[LOAD]](s64), 0 + ; VI: [[INSERT1:%[0-9]+]]:_(s224) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64 + ; VI: [[INSERT2:%[0-9]+]]:_(s224) = G_INSERT [[INSERT1]], [[LOAD2]](s64), 128 + ; VI: [[INSERT3:%[0-9]+]]:_(s224) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 192 + ; VI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI: [[INSERT4:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[INSERT3]](s224), 0 + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT4]](s256) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s224) = G_LOAD %0 :: (load 28, addrspace 1, align 4) + + %2:_(s256) = G_IMPLICIT_DEF + %3:_(s256) = G_INSERT %2, %1, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 + +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -227,6 +227,16 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-LABEL: name: test_truncstore_global_s128_to_s16 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI-LABEL: name: test_truncstore_global_s128_to_s16 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store 2, addrspace 1) @@ -238,6 +248,16 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI-LABEL: name: test_truncstore_global_s128_to_s8 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI-LABEL: name: test_truncstore_global_s128_to_s8 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store 1, addrspace 1) @@ -248,13 +268,6 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-LABEL: name: test_store_global_i1 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CHECK: G_STORE [[AND]](s32), [[COPY]](p1) :: (store 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s1) = G_TRUNC %1 @@ -267,11 +280,16 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-LABEL: name: test_store_global_i8 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI-LABEL: name: test_store_global_i8 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI-LABEL: name: test_store_global_i8 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s8) = G_TRUNC %1 @@ -284,27 +302,61 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-LABEL: name: test_store_global_i16 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI-LABEL: name: test_store_global_i16 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI-LABEL: name: test_store_global_i16 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 G_STORE %2, %0 :: (store 2, addrspace 1) ... +--- +name: test_store_global_96 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 + + ; SI-LABEL: name: test_store_global_96 + ; SI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; SI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 + ; SI: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 + ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 + ; SI: G_STORE [[EXTRACT]](s64), [[COPY1]](p1) :: (store 8, align 16, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY1]], [[C]](s64) + ; SI: G_STORE [[EXTRACT1]](s32), [[GEP]](p1) :: (store 4, align 8, addrspace 1) + ; VI-LABEL: name: test_store_global_96 + ; VI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 + ; VI: G_STORE [[COPY]](s96), [[COPY1]](p1) :: (store 12, align 16, addrspace 1) + %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(p1) = COPY $vgpr3_vgpr4 + + G_STORE %0, %1 :: (store 12, addrspace 1, align 16) +... + --- name: test_store_global_i128 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-LABEL: name: test_store_global_i128 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI-LABEL: name: test_store_global_i128 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI-LABEL: name: test_store_global_i128 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store 16, addrspace 1) @@ -316,10 +368,14 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-LABEL: name: test_store_global_v2s64 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI-LABEL: name: test_store_global_v2s64 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI-LABEL: name: test_store_global_v2s64 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store 16, addrspace 1) Index: test/CodeGen/X86/GlobalISel/x86_64-fallback.ll =================================================================== --- test/CodeGen/X86/GlobalISel/x86_64-fallback.ll +++ test/CodeGen/X86/GlobalISel/x86_64-fallback.ll @@ -8,7 +8,7 @@ ; the fallback path. ; Check that we fallback on invoke translation failures. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(s80), %0:_(p0) :: (store 10 into %ir.ptr, align 16) (in function: test_x86_fp80_dump) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %3:_(s16) = G_EXTRACT %1:_(s80), 64 (in function: test_x86_fp80_dump) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_x86_fp80_dump ; FALLBACK-WITH-REPORT-OUT-LABEL: test_x86_fp80_dump: define void @test_x86_fp80_dump(x86_fp80* %ptr){