Index: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td +++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1764,68 +1764,72 @@ SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, [SDNPHasChain, SDNPSideEffect]>; -class LoadParamMemInst : - NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), - !strconcat(!strconcat("ld.param", opstr), - "\t$dst, [retval0+$b];"), - []>; +let mayLoad = 1 in { + class LoadParamMemInst : + NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), + !strconcat(!strconcat("ld.param", opstr), + "\t$dst, [retval0+$b];"), + []>; + + class LoadParamV2MemInst : + NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), + !strconcat("ld.param.v2", opstr, + "\t{{$dst, $dst2}}, [retval0+$b];"), []>; + + class LoadParamV4MemInst : + NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, + regclass:$dst4), + (ins i32imm:$b), + !strconcat("ld.param.v4", opstr, + "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), + []>; +} class LoadParamRegInst : NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), !strconcat("mov", opstr, "\t$dst, retval$b;"), [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; -class LoadParamV2MemInst : - NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), - !strconcat("ld.param.v2", opstr, - "\t{{$dst, $dst2}}, [retval0+$b];"), []>; - -class LoadParamV4MemInst : - NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, - regclass:$dst4), - (ins i32imm:$b), - !strconcat("ld.param.v4", opstr, - "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), - []>; - -class StoreParamInst : - NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), - !strconcat("st.param", opstr, "\t[param$a+$b], $val;"), - []>; - -class StoreParamV2Inst : - NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, - i32imm:$a, i32imm:$b), - !strconcat("st.param.v2", opstr, - "\t[param$a+$b], {{$val, $val2}};"), - []>; - -class StoreParamV4Inst : - NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, regclass:$val3, - regclass:$val4, i32imm:$a, - i32imm:$b), - !strconcat("st.param.v4", opstr, - "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), - []>; - -class StoreRetvalInst : - NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), - !strconcat("st.param", opstr, "\t[func_retval0+$a], $val;"), - []>; - -class StoreRetvalV2Inst : - NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), - !strconcat("st.param.v2", opstr, - "\t[func_retval0+$a], {{$val, $val2}};"), - []>; - -class StoreRetvalV4Inst : - NVPTXInst<(outs), - (ins regclass:$val, regclass:$val2, regclass:$val3, - regclass:$val4, i32imm:$a), - !strconcat("st.param.v4", opstr, - "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), - []>; +let mayStore = 1 in { + class StoreParamInst : + NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), + !strconcat("st.param", opstr, "\t[param$a+$b], $val;"), + []>; + + class StoreParamV2Inst : + NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, + i32imm:$a, i32imm:$b), + !strconcat("st.param.v2", opstr, + "\t[param$a+$b], {{$val, $val2}};"), + []>; + + class StoreParamV4Inst : + NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, regclass:$val3, + regclass:$val4, i32imm:$a, + i32imm:$b), + !strconcat("st.param.v4", opstr, + "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), + []>; + + class StoreRetvalInst : + NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), + !strconcat("st.param", opstr, "\t[func_retval0+$a], $val;"), + []>; + + class StoreRetvalV2Inst : + NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), + !strconcat("st.param.v2", opstr, + "\t[func_retval0+$a], {{$val, $val2}};"), + []>; + + class StoreRetvalV4Inst : + NVPTXInst<(outs), + (ins regclass:$val, regclass:$val2, regclass:$val3, + regclass:$val4, i32imm:$a), + !strconcat("st.param.v4", opstr, + "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), + []>; +} let isCall=1 in { multiclass CALL { Index: llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td +++ llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1381,7 +1381,11 @@ // Support for ldu on sm_20 or later //----------------------------------- +// Don't annotate ldu instructions as mayLoad, as they load from memory that is +// read-only in a kernel. + // Scalar + multiclass LDU_G { def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), !strconcat("ldu.global.", TyStr), @@ -1477,6 +1481,10 @@ // Support for ldg on sm_35 or later //----------------------------------- +// Don't annotate ld.global.nc as mayLoad, because these loads go through the +// non-coherent texture cache, and therefore the values read must be read-only +// during the lifetime of the kernel. + multiclass LDG_G { def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), !strconcat("ld.global.nc.", TyStr),