Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -708,6 +708,11 @@ void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, const BasicBlock *EHPadBB = nullptr); + // Lower range metadata from 0 to N to assert zext to an integer of nearest + // floor power of two. + SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, + SDValue Op); + std::pair lowerCallOperands( ImmutableCallSite CS, unsigned ArgIdx, Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3721,7 +3721,8 @@ if (VectorType *PTy = dyn_cast(I.getType())) { EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); - } + } else + Result = lowerRangeToAssertZExt(DAG, I, Result); setValue(&I, Result); } @@ -5419,8 +5420,11 @@ .setTailCall(isTailCall); std::pair Result = lowerInvokable(CLI, EHPadBB); - if (Result.first.getNode()) - setValue(CS.getInstruction(), Result.first); + if (Result.first.getNode()) { + const Instruction *Inst = CS.getInstruction(); + Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first); + setValue(Inst, Result.first); + } } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the @@ -6716,6 +6720,42 @@ DAG.getSrcValue(I.getArgOperand(1)))); } +SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, + const Instruction &I, + SDValue Op) { + const MDNode *Range = I.getMetadata(LLVMContext::MD_range); + if (!Range) + return Op; + + Constant *Lo = cast(Range->getOperand(0))->getValue(); + if (!Lo->isNullValue()) + return Op; + + Constant *Hi = cast(Range->getOperand(1))->getValue(); + unsigned Bits = cast(Hi)->getValue().logBase2(); + + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); + + SDLoc SL = getCurSDLoc(); + + SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), + Op, DAG.getValueType(SmallVT)); + + return ZExt; + + unsigned NumVals = Op.getNode()->getNumValues(); + if (NumVals == 1) + return ZExt; + + SmallVector Ops; + + Ops.push_back(ZExt); + for (unsigned I = 1; I != NumVals; ++I) + Ops.push_back(Op.getValue(I)); + + return DAG.getMergeValues(Ops, SL); +} + /// \brief Lower an argument list according to the target calling convention. /// /// \return A tuple of Index: test/CodeGen/AArch64/lower-range-metadata-func-call.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/lower-range-metadata-func-call.ll @@ -0,0 +1,44 @@ +; RUN: llc -march=aarch64 -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +; and can be eliminated +; CHECK-LABEL: {{^}}test_call_known_max_range: +; CHECK: bl foo +; CHECK-NOT: and +; CHECK: ret +define i32 @test_call_known_max_range() #0 { +entry: + %id = tail call i32 @foo(), !range !0 + %and = and i32 %id, 1023 + ret i32 %and +} + +; CHECK-LABEL: {{^}}test_call_known_trunc_1_bit_range: +; CHECK: bl foo +; CHECK: and w{{[0-9]+}}, w0, #0x1ff +; CHECK: ret +define i32 @test_call_known_trunc_1_bit_range() #0 { +entry: + %id = tail call i32 @foo(), !range !0 + %and = and i32 %id, 511 + ret i32 %and +} + +; CHECK-LABEL: {{^}}test_call_known_max_range_m1: +; CHECK: bl foo +; CHECK: and w{{[0-9]+}}, w0, #0xff +; CHECK: ret +define i32 @test_call_known_max_range_m1() #0 { +entry: + %id = tail call i32 @foo(), !range !1 + %and = and i32 %id, 255 + ret i32 %and +} + + +declare i32 @foo() + +attributes #0 = { norecurse nounwind } +attributes #1 = { nounwind readnone } + +!0 = !{i32 0, i32 1024} +!1 = !{i32 0, i32 1023} Index: test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-unknown < %s | FileCheck %s + +; and can be eliminated +; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range: +; CHECK-NOT: v0 +; CHECK: {{flat|buffer}}_store_dword v0 +define void @test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 { +entry: + %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 + %and = and i32 %id, 1023 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: {{^}}test_workitem_id_x_known_trunc_1_bit_range: +; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1ff, v0 +; CHECK: {{flat|buffer}}_store_dword [[MASKED]] +define void @test_workitem_id_x_known_trunc_1_bit_range(i32 addrspace(1)* nocapture %out) #0 { +entry: + %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 + %and = and i32 %id, 511 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range_m1: +; CHECK-NOT: v0 +; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xff, v0 +; CHECK: {{flat|buffer}}_store_dword [[MASKED]] +define void @test_workitem_id_x_known_max_range_m1(i32 addrspace(1)* nocapture %out) #0 { +entry: + %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !1 + %and = and i32 %id, 255 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { norecurse nounwind } +attributes #1 = { nounwind readnone } + +!0 = !{i32 0, i32 1024} +!1 = !{i32 0, i32 1023}