Index: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -459,6 +459,9 @@ if (Val == OtherOp) OtherOp = Inst->getOperand(OpIdx1); + if (isa(OtherOp)) + return true; + Value *OtherObj = GetUnderlyingObject(OtherOp, *DL); if (!isa(OtherObj)) return false; @@ -526,6 +529,9 @@ if (ICmpInst *ICmp = dyn_cast(UseInst)) { if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1)) return false; + + // May need to rewrite constant operands. + WorkList.push_back(ICmp); } if (!User->getType()->isPointerTy()) @@ -650,16 +656,45 @@ for (Value *V : WorkList) { CallInst *Call = dyn_cast(V); if (!Call) { - Type *EltTy = V->getType()->getPointerElementType(); - PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); + if (ICmpInst *CI = dyn_cast(V)) { + Value *Src0 = CI->getOperand(0); + Type *EltTy = Src0->getType()->getPointerElementType(); + PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); + + if (isa(CI->getOperand(0))) + CI->setOperand(0, ConstantPointerNull::get(NewTy)); + + if (isa(CI->getOperand(1))) + CI->setOperand(1, ConstantPointerNull::get(NewTy)); + + continue; + } // The operand's value should be corrected on its own. if (isa(V)) continue; + Type *EltTy = V->getType()->getPointerElementType(); + PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); + // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); + + // Adjust the types of any constant operands. + if (SelectInst *SI = dyn_cast(V)) { + if (isa(SI->getOperand(1))) + SI->setOperand(1, ConstantPointerNull::get(NewTy)); + + if (isa(SI->getOperand(2))) + SI->setOperand(2, ConstantPointerNull::get(NewTy)); + } else if (PHINode *Phi = dyn_cast(V)) { + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { + if (isa(Phi->getIncomingValue(I))) + Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy)); + } + } + continue; } Index: test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll +++ test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll @@ -18,6 +18,32 @@ ret void } +; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs( +; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 %{{[0-9]+}} +; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a +; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, null +define void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { + %alloca = alloca [16 x i32], align 4 + %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a + %cmp = icmp eq i32* %ptr0, null + %zext = zext i1 %cmp to i32 + store volatile i32 %zext, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs( +; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 %{{[0-9]+}} +; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a +; CHECK: %cmp = icmp eq i32 addrspace(3)* null, %ptr0 +define void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { + %alloca = alloca [16 x i32], align 4 + %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a + %cmp = icmp eq i32* null, %ptr0 + %zext = zext i1 %cmp to i32 + store volatile i32 %zext, i32 addrspace(1)* %out + ret void +} + ; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr( ; CHECK: %alloca = alloca [16 x i32], align 4 ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a Index: test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll +++ test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll @@ -32,6 +32,40 @@ ret void } +; CHECK-LABEL: @branch_ptr_phi_alloca_null_0( +; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ] +define void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 { +entry: + %alloca = alloca [64 x i32], align 4 + br i1 undef, label %if, label %endif + +if: + %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a + br label %endif + +endif: + %phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ] + store i32 0, i32* %phi.ptr, align 4 + ret void +} + +; CHECK-LABEL: @branch_ptr_phi_alloca_null_1( +; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ] +define void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 { +entry: + %alloca = alloca [64 x i32], align 4 + br i1 undef, label %if, label %endif + +if: + %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a + br label %endif + +endif: + %phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ] + store i32 0, i32* %phi.ptr, align 4 + ret void +} + ; CHECK-LABEL: @one_phi_value( ; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @one_phi_value.alloca, i32 0, i32 %14 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a Index: test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll +++ test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll @@ -99,4 +99,34 @@ ret void } +; CHECK-LABEL: @select_null_rhs( +; CHECK-NOT: alloca +; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null +define void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #0 { +bb: + %tmp = alloca double, align 8 + store double 0.000000e+00, double* %tmp, align 8 + %tmp2 = icmp eq i32 %arg1, 0 + %tmp3 = select i1 %tmp2, double* %tmp, double* null + store double 1.000000e+00, double* %tmp3, align 8 + %tmp4 = load double, double* %tmp, align 8 + store double %tmp4, double addrspace(1)* %arg + ret void +} + +; CHECK-LABEL: @select_null_lhs( +; CHECK-NOT: alloca +; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}} +define void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #0 { +bb: + %tmp = alloca double, align 8 + store double 0.000000e+00, double* %tmp, align 8 + %tmp2 = icmp eq i32 %arg1, 0 + %tmp3 = select i1 %tmp2, double* null, double* %tmp + store double 1.000000e+00, double* %tmp3, align 8 + %tmp4 = load double, double* %tmp, align 8 + store double %tmp4, double addrspace(1)* %arg + ret void +} + attributes #0 = { norecurse nounwind }